=> Bootstrap dependency digest>=20211023: found digest-20220214 ===> Skipping vulnerability checks. WARNING: No /usr/pkg/pkgdb/pkg-vulnerabilities file found. WARNING: To fix run: `/usr/sbin/pkg_admin -K /usr/pkg/pkgdb fetch-pkg-vulnerabilities'. ===> Building for xemacs-21.5.35nb6 Resetting `src/sheap-adjust.h'. Producing `src/Emacs.ad.h' from `etc/Emacs.ad'. cp /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/PROBLEMS etc/PROBLEMS if test -n "/pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo"; then cd /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/man && /usr/bin/make -j 8 CC='gcc' CFLAGS='-Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' LDFLAGS='-Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib' CPPFLAGS='-DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' MAKEINFO='/pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo' info; fi --- ../info/beta.info --- --- ../info/cl.info --- --- ../info/emodules.info --- --- ../info/external-widget.info --- --- ../info/info.info --- --- ../info/internals.info --- --- ../info/lispref.info --- --- ../info/new-users-guide.info --- --- ../info/beta.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/beta.info beta.texi --- ../info/emodules.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/emodules.info emodules.texi --- ../info/external-widget.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/external-widget.info external-widget.texi --- ../info/lispref.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -P lispref -o ../info/lispref.info lispref/lispref.texi --- ../info/standards.info --- --- ../info/new-users-guide.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -P new-users-guide -o ../info/new-users-guide.info new-users-guide/new-users-guide.texi --- ../info/internals.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -P internals -o ../info/internals.info internals/internals.texi --- ../info/termcap.info --- --- ../info/info.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/info.info info.texi --- ../info/widget.info --- --- ../info/termcap.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/termcap.info termcap.texi --- ../info/cl.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/cl.info cl.texi --- ../info/internals.info --- internals/internals.texi:7396: warning: `.' or `,' must follow @xref, not `)'. --- ../info/xemacs.info --- --- ../info/xemacs-faq.info --- --- ../info/xemacs.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -P xemacs -o ../info/xemacs.info xemacs/xemacs.texi --- ../info/standards.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/standards.info standards.texi --- ../info/widget.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/widget.info widget.texi --- ../info/xemacs-faq.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/xemacs-faq.info xemacs-faq.texi xemacs-faq.texi:7: warning: unrecognized encoding name `UTF-8'. --- ../info/lispref.info --- /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/man/lispref//searching.texi:1557: warning: unlikely character ( in @var. /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/man/lispref//searching.texi:1557: warning: unlikely character ) in @var. /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/man/lispref//mule.texi:6: warning: unrecognized encoding name `UTF-8'. cd ./lib-src && /usr/bin/make -j 8 CC='gcc' CFLAGS='-Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' LDFLAGS='-Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib' CPPFLAGS='-DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' MAKEINFO='/pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo' all --- gnuslib.o --- --- ellcc --- --- getopt.o --- --- getopt1.o --- --- regex.o --- --- b2m --- --- fakemail --- --- profile --- --- gnuslib.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuslib.c --- ellcc --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/ellcc.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o ellcc --- getopt.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/getopt.c --- getopt1.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/getopt1.c --- regex.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -DINHIBIT_STRING_HEADER /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/regex.c --- b2m --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/b2m.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o b2m --- fakemail --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/fakemail.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o fakemail --- profile --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/profile.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o profile --- getopt.o --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/getopt.c:32:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- ellcc --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/ellcc.c:59:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gnuslib.o --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuserv.h:55:0, from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuslib.c:48: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- b2m --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/b2m.c:23:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- profile --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/profile.c:34:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- make-docfile --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/make-docfile.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o make-docfile --- profile --- In file included from /usr/include/stdio.h:597:0, from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/profile.c:35: /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/profile.c: In function 'get_time': /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/profile.c:86:25: warning: format '%lu' expects argument of type 'long unsigned int', but argument 5 has type 'time_t {aka long long int}' [-Wformat=] sprintf (time_string, "%lu.%06lu", ^ (unsigned long) TV2.tv_sec - TV1.tv_sec, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- digest-doc --- gcc -Demacs -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/digest-doc.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o digest-doc --- regex.o --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/regex.c:27:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- make-docfile --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/make-docfile.c:42:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- sorted-doc --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/sorted-doc.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o sorted-doc --- cvtmail --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/cvtmail.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o cvtmail In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/cvtmail.c:36:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- sorted-doc --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/sorted-doc.c:28:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- hexl --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/hexl.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o hexl --- mmencode --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/mmencode.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o mmencode --- make-path --- gcc -Demacs -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/make-path.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o make-path --- mmencode --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/mmencode.c:17:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- make-dump-id --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/make-dump-id.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o make-dump-id --- insert-data-in-exec --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/insert-data-in-exec.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o insert-data-in-exec --- hexl --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/hexl.c:28:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- make-dump-id --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/make-dump-id.c:19:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gnuclient --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuclient.c gnuslib.o -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -L/usr/X11R7/lib -R/usr/X11R7/lib -lXau -lXmu -lXt -lXext -lX11 -lSM -lICE -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o gnuclient --- gnuserv --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuserv.c gnuslib.o -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -L/usr/X11R7/lib -R/usr/X11R7/lib -lXau -lXmu -lXt -lXext -lX11 -lSM -lICE -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o gnuserv --- gnuclient --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuserv.h:55:0, from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuclient.c:48: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gnuserv --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuserv.h:55:0, from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuserv.c:42: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- etags --- --- ootags --- --- movemail --- --- etags --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -DEMACS_NAME='"XEmacs"' -DVERSION='"21.5-b35"' /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/etags.c getopt.o getopt1.o regex.o -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o etags --- ootags --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -DVERSION='"21.5-b35"' /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/ootags.c getopt.o getopt1.o regex.o -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o ootags --- movemail --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/movemail.c /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/pop.c getopt.o getopt1.o regex.o -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o movemail --- etags --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/etags.c:96:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- ootags --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/ootags.c:51:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- movemail --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/movemail.c:61:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- ctags --- gcc -DCTAGS -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -DEMACS_NAME='"XEmacs"' -DVERSION='"21.5-b35"' /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/etags.c getopt.o getopt1.o regex.o -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o ctags In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/etags.c:96:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ cd ./lwlib && /usr/bin/make -j 8 CC='gcc' CFLAGS='-Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' LDFLAGS='-Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib' CPPFLAGS='-DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' MAKEINFO='/pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo' all --- lwlib.o --- --- lwlib-utils.o --- --- lwlib-colors.o --- --- lwlib-fonts.o --- --- lwlib-Xaw.o --- --- xlwmenu.o --- --- xlwscrollbar.o --- --- xlwtabs.o --- --- lwlib.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lwlib.c --- lwlib-utils.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lwlib-utils.c --- lwlib-colors.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lwlib-colors.c --- lwlib-fonts.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lwlib-fonts.c --- lwlib-Xaw.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lwlib-Xaw.c --- xlwmenu.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include xlwmenu.c --- xlwscrollbar.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include xlwscrollbar.c --- xlwtabs.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include xlwtabs.c --- lwlib-utils.o --- In file included from ./config.h:25:0, from lwlib-utils.c:20: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- lwlib-fonts.o --- In file included from ./config.h:25:0, from lwlib-fonts.c:26: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- lwlib.o --- In file included from ./config.h:25:0, from lwlib.c:21: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- lwlib-Xaw.o --- In file included from ./config.h:25:0, from lwlib-Xaw.c:21: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- lwlib-colors.o --- In file included from ./config.h:25:0, from lwlib-colors.c:25: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- xlwmenu.o --- In file included from ./config.h:25:0, from xlwmenu.c:23: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- xlwscrollbar.o --- In file included from ./config.h:25:0, from xlwscrollbar.c:71: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- xlwgcs.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include xlwgcs.c --- xlwradio.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include xlwradio.c --- xlwcheckbox.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include xlwcheckbox.c --- xlwgauge.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include xlwgauge.c --- lwlib-Xlw.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lwlib-Xlw.c --- xlwgauge.o --- In file included from ./config.h:25:0, from xlwgauge.c:46: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- lwlib-Xlw.o --- In file included from ./config.h:25:0, from lwlib-Xlw.c:20: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- liblw.a --- rm -f liblw.a ar cq liblw.a lwlib.o lwlib-utils.o lwlib-colors.o lwlib-fonts.o lwlib-Xaw.o xlwmenu.o xlwscrollbar.o xlwtabs.o xlwgcs.o xlwradio.o xlwcheckbox.o xlwgauge.o lwlib-Xlw.o cd ./src && /usr/bin/make -j 8 CC='gcc' CFLAGS='-Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' LDFLAGS='-Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib' CPPFLAGS='-DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' MAKEINFO='/pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo' all --- TopLevelEmacsShell.o --- --- toolbar-xlike.o --- --- EmacsFrame.o --- --- EmacsManager.o --- --- EmacsShell.o --- --- balloon-x.o --- --- balloon_help.o --- --- console-x.o --- --- TopLevelEmacsShell.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include -DDEFINE_TOP_LEVEL_EMACS_SHELL /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/EmacsShell-sub.c --- toolbar-xlike.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include toolbar-xlike.c --- EmacsFrame.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include EmacsFrame.c --- EmacsManager.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include EmacsManager.c --- EmacsShell.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include EmacsShell.c --- balloon-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include balloon-x.c --- balloon_help.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include balloon_help.c --- console-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include console-x.c --- balloon_help.o --- In file included from balloon_help.c:33:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- balloon-x.o --- In file included from balloon-x.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- toolbar-xlike.o --- In file included from toolbar-xlike.c:24:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- EmacsShell.o --- In file included from EmacsShell.c:23:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- console-x.o --- In file included from console-x.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- TopLevelEmacsShell.o --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/EmacsShell-sub.c:79:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- EmacsFrame.o --- In file included from EmacsFrame.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- device-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include device-x.c --- dialog-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include dialog-x.c --- device-x.o --- In file included from device-x.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- dialog-x.o --- In file included from dialog-x.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- TopLevelEmacsShell.o --- mv EmacsShell-sub.o TopLevelEmacsShell.o --- fontcolor-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include fontcolor-x.c --- frame-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include frame-x.c --- gccache-x.o --- --- fontcolor-x.o --- In file included from fontcolor-x.c:31:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gccache-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include gccache-x.c --- device-x.o --- device-x.c: In function 'x_IO_error_handler': device-x.c:1275:30: warning: format '%zu' expects argument of type 'size_t', but argument 2 has type 'long unsigned int' [-Wformat=] stderr_out (" after %zu requests (%zu known processed) with %u " ~~^ %lu device-x.c:1277:5: (EMACS_UINT) (NextRequest (disp) - 1), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ device-x.c:1275:44: warning: format '%zu' expects argument of type 'size_t', but argument 3 has type 'long unsigned int' [-Wformat=] stderr_out (" after %zu requests (%zu known processed) with %u " ~~^ %lu device-x.c:1291:3: warning: format '%zu' expects argument of type 'size_t', but argument 7 has type 'long int' [-Wformat=] "I/O Error %d (%s) on display connection\n" ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ device-x.c:1296:10: (EMACS_INT) (NextRequest (disp) - 1), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ device-x.c:1292:21: note: format string is defined here " \"%s\" after %zu requests (%zu known processed)\n" ~~^ %lu device-x.c:1291:3: warning: format '%zu' expects argument of type 'size_t', but argument 8 has type 'long unsigned int' [-Wformat=] "I/O Error %d (%s) on display connection\n" ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ device-x.c:1292:35: note: format string is defined here " \"%s\" after %zu requests (%zu known processed)\n" ~~^ %lu --- frame-x.o --- In file included from frame-x.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- glyphs-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include glyphs-x.c --- gccache-x.o --- In file included from gccache-x.c:53:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- fontcolor-x.o --- fontcolor-x.c: In function 'x_print_font_instance': fontcolor-x.c:411:52: warning: format '%zx' expects argument of type 'size_t', but argument 3 has type 'Font {aka long unsigned int}' [-Wformat=] write_fmt_string (printcharfun, " font id: 0x%zx,", ~~^ %lx --- glyphs-x.o --- In file included from glyphs-x.c:53:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gui-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include gui-x.c In file included from gui-x.c:26:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- intl-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include intl-x.c --- menubar-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include menubar-x.c --- intl-x.o --- In file included from intl-x.c:20:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- menubar-x.o --- In file included from menubar-x.c:34:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- redisplay-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include redisplay-x.c --- glyphs-x.o --- glyphs-x.c: In function 'x_finalize_image_instance': glyphs-x.c:417:6: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] XDestroyWindow (dpy, IMAGE_INSTANCE_X_SUBWINDOW_ID (p)); ^~~~~~~~~~~~~~ glyphs-x.c: In function 'x_map_subwindow': glyphs-x.c:2118:7: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] Window subwindow = IMAGE_INSTANCE_X_SUBWINDOW_ID (p); ^~~~~~ glyphs-x.c: In function 'x_redisplay_subwindow': glyphs-x.c:2163:8: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] IMAGE_INSTANCE_X_SUBWINDOW_ID (p), ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- scrollbar-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include scrollbar-x.c --- frame-x.o --- frame-x.c: In function 'Fx_window_id': frame-x.c:2230:35: warning: format '%zu' expects argument of type 'size_t', but argument 2 has type 'long unsigned int' [-Wformat=] return emacs_sprintf_string ("%zu", ~~^ %lu --- redisplay-x.o --- In file included from redisplay-xlike-inc.c:34:0, from redisplay-x.c:33: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- scrollbar-x.o --- In file included from scrollbar-x.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ scrollbar-x.c: In function 'x_create_scrollbar_instance': scrollbar-x.c:115:32: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] "scrollbar_%zd", (EMACS_INT) (SCROLLBAR_X_ID (instance))); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ %ld --- select-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include select-x.c In file included from select-x.c:24:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- toolbar-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include toolbar-x.c --- console-tty.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include console-tty.c --- toolbar-x.o --- In file included from toolbar-x.c:26:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- console-tty.o --- In file included from console-tty.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- device-tty.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include device-tty.c --- event-tty.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include event-tty.c --- device-tty.o --- In file included from device-tty.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- event-tty.o --- In file included from event-tty.c:23:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- fontcolor-tty.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include fontcolor-tty.c --- frame-tty.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include frame-tty.c --- redisplay-tty.o --- --- fontcolor-tty.o --- In file included from fontcolor-tty.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- redisplay-tty.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include redisplay-tty.c --- database.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include database.c --- mule-ccl.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include mule-ccl.c --- frame-tty.o --- In file included from frame-tty.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- redisplay-tty.o --- In file included from redisplay-tty.c:30:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- mule-ccl.o --- In file included from mule-ccl.c:23:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- mule-charset.o --- --- database.o --- In file included from database.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- mule-charset.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include mule-charset.c In file included from mule-charset.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- mule-ccl.o --- mule-ccl.c: In function 'ccl_driver': mule-ccl.c:2001:48: warning: format '%zx' expects argument of type 'size_t', but argument 5 has type 'long int' [-Wformat=] "\nCCL: Invalid command %lx (ccl_code = %zx) at %zd.", ~~^ %lx mule-ccl.c:2001:56: warning: format '%zd' expects argument of type 'signed size_t', but argument 6 has type 'long int' [-Wformat=] "\nCCL: Invalid command %lx (ccl_code = %zx) at %zd.", ~~^ %ld mule-ccl.c:2007:56: warning: format '%zx' expects argument of type 'size_t', but argument 5 has type 'long int' [-Wformat=] "\nCCL: Invalid charset (command %x, ccl_code = %zx)" ~~^ %lx mule-ccl.c:2007:6: warning: format '%zd' expects argument of type 'signed size_t', but argument 6 has type 'long int' [-Wformat=] "\nCCL: Invalid charset (command %x, ccl_code = %zx)" ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mule-ccl.c:2008:34: note: format string is defined here " at %zd.", (int) (code & 0x1F), code, this_ic); ~~^ %ld mule-ccl.c:2013:6: warning: format '%zx' expects argument of type 'size_t', but argument 5 has type 'long int' [-Wformat=] "\nCCL: Conversion error (command %x, " ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mule-ccl.c:2014:41: note: format string is defined here "ccl_code = %zx) at %zd.", (int) (code & 0x1F), ~~^ %lx mule-ccl.c:2013:6: warning: format '%zd' expects argument of type 'signed size_t', but argument 6 has type 'long int' [-Wformat=] "\nCCL: Conversion error (command %x, " ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mule-ccl.c:2014:49: note: format string is defined here "ccl_code = %zx) at %zd.", (int) (code & 0x1F), ~~^ %ld mule-ccl.c:2033:12: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] " %zd", ~~^ %ld ccl_backtrace_table[i])); ~~~~~~~~~~~~~~~~~~~~~~ --- mule-coding.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include mule-coding.c In file included from mule-coding.c:31:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- abbrev.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include abbrev.c In file included from abbrev.c:33:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- alloc.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include alloc.c In file included from alloc.c:40:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- alloca.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include alloca.c --- array.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include array.c --- alloca.o --- In file included from alloca.c:33:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- array.o --- In file included from array.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- alloc.o --- alloc.c: In function 'old_alloc_sized_lcrecord': alloc.c:450:14: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Bytecount {aka long int}' [-Wformat=] stderr_out ("allocating %s (size %zd)\n", type, \ ^ alloc.c:540:3: note: in expansion of macro 'INCREMENT_CONS_COUNTER' INCREMENT_CONS_COUNTER (size, implementation->name); ^~~~~~~~~~~~~~~~~~~~~~ alloc.c: In function 'allocate_big_string_chars': alloc.c:450:14: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Bytecount {aka long int}' [-Wformat=] stderr_out ("allocating %s (size %zd)\n", type, \ ^ alloc.c:2599:3: note: in expansion of macro 'INCREMENT_CONS_COUNTER' INCREMENT_CONS_COUNTER (length, "string chars"); ^~~~~~~~~~~~~~~~~~~~~~ alloc.c: In function 'allocate_string_chars_struct': alloc.c:450:14: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Bytecount {aka long int}' [-Wformat=] stderr_out ("allocating %s (size %zd)\n", type, \ ^ alloc.c:2635:3: note: in expansion of macro 'INCREMENT_CONS_COUNTER' INCREMENT_CONS_COUNTER (fullsize, "string chars"); ^~~~~~~~~~~~~~~~~~~~~~ alloc.c: In function 'resize_string': alloc.c:450:14: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Bytecount {aka long int}' [-Wformat=] stderr_out ("allocating %s (size %zd)\n", type, \ ^ alloc.c:2762:4: note: in expansion of macro 'INCREMENT_CONS_COUNTER' INCREMENT_CONS_COUNTER (newfullsize, "string chars"); ^~~~~~~~~~~~~~~~~~~~~~ --- blocktype.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include blocktype.c --- buffer.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include buffer.c --- blocktype.o --- In file included from blocktype.c:61:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- buffer.o --- In file included from buffer.c:74:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- bytecode.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include bytecode.c --- callint.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include callint.c --- bytecode.o --- In file included from bytecode.c:50:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- buffer.o --- buffer.c: In function 'Fgenerate_new_buffer_name': buffer.c:792:54: warning: format '%zd' expects argument of type 'signed size_t', but argument 5 has type 'long int' [-Wformat=] clen = emacs_snprintf (candidate, csize, "%s<%zd>", XSTRING_DATA (name), ~~^ %ld ++count); ~~~~~~~ --- callint.o --- In file included from callint.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- bytecode.o --- bytecode.c: In function 'check_constants_index': bytecode.c:1996:59: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] "reference %d to constants array out of range 0, %zd", ~~^ %ld bytecode.c: In function 'print_compiled_function': bytecode.c:2462:43: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Charcount {aka long int}' [-Wformat=] write_fmt_string (printcharfun, "\"...(%zd)\"", ~~^ %ld --- casefiddle.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include casefiddle.c In file included from casefiddle.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- casetab.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include casetab.c In file included from casetab.c:74:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- chartab.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include chartab.c In file included from chartab.c:52:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- cm.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include cm.c In file included from cm.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- cmdloop.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include cmdloop.c In file included from cmdloop.c:29:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- cmds.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include cmds.c In file included from cmds.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- console-stream.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include console-stream.c --- console.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include console.c --- console-stream.o --- In file included from console-stream.c:26:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- data.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include data.c --- console.o --- In file included from console.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- data.o --- In file included from data.c:26:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- debug.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include debug.c --- device.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include device.c --- debug.o --- In file included from debug.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- device.o --- In file included from device.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- dialog.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include dialog.c In file included from dialog.c:23:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- dired.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include dired.c --- doc.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include doc.c --- dired.o --- In file included from dired.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- doc.o --- In file included from doc.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- doprnt.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include doprnt.c In file included from doprnt.c:42:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- dumper.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include dumper.c --- doprnt.o --- In file included from doprnt.c:47:0: doprnt.c: In function 'emacs_vsnprintf': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:585:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->outbuf = buf; \ ^~~~~~~~~~~~~~~~~~~~~~~~ doprnt.c:3709:7: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (stream, output, size); ^ lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:586:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->size = bsize; \ ^~~~~~~~~~~~~~~~~~~~~~~~ doprnt.c:3709:7: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (stream, output, size); ^ doprnt.c: In function 'emacs_vsnprintf_ascbyte': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:585:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->outbuf = buf; \ ^~~~~~~~~~~~~~~~~~~~~~~~ doprnt.c:3793:3: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (stream, (Ibyte *) output, size); ^ lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:586:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->size = bsize; \ ^~~~~~~~~~~~~~~~~~~~~~~~ doprnt.c:3793:3: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (stream, (Ibyte *) output, size); ^ --- dumper.o --- In file included from dumper.c:35:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- editfns.o --- --- elhash.o --- --- editfns.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include editfns.c --- emacs.o --- --- elhash.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include elhash.c --- emacs.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include emacs.c --- editfns.o --- In file included from editfns.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- elhash.o --- In file included from elhash.c:78:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- emacs.o --- In file included from emacs.c:424:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- elhash.o --- elhash.c: In function 'print_hash_table': elhash.c:543:44: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Elemcount {aka long int}' [-Wformat=] write_fmt_string (printcharfun, " :size %zd", ht->count); ~~^ ~~~~~~~~~ %ld elhash.c:545:44: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Elemcount {aka long int}' [-Wformat=] write_fmt_string (printcharfun, " :size %zd/%zd", ht->count, ~~^ ~~~~~~~~~ %ld elhash.c:545:48: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'Elemcount {aka long int}' [-Wformat=] write_fmt_string (printcharfun, " :size %zd/%zd", ht->count, ~~^ %ld ht->size); ~~~~~~~~ --- emacs.o --- In file included from /usr/include/stdio.h:597:0, from lisp.h:114, from emacs.c:425: emacs.c: In function 'assert_equal_failed': emacs.c:3937:20: warning: format '%zd' expects argument of type 'signed size_t', but argument 6 has type 'long int' [-Wformat=] sprintf (bigstr, "%s (%zd) should == %s (%zd) but doesn't", ^ emacs.c:3937:20: warning: format '%zd' expects argument of type 'signed size_t', but argument 8 has type 'long int' [-Wformat=] --- emodules.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include emodules.c In file included from emodules.h:34:0, from emodules.c:22: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- eval.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include eval.c In file included from eval.c:136:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- emodules.o --- emodules.c: In function 'emodules_load': emodules.c:385:80: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] signal_ferror (Qdll_error, "Invalid dynamic module: Unsupported version `%zd(%zd)'", *ellcc_rev, EMODULES_REVISION); ~~^ ~~~~~~~~~~ %ld emodules.c:385:84: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] signal_ferror (Qdll_error, "Invalid dynamic module: Unsupported version `%zd(%zd)'", *ellcc_rev, EMODULES_REVISION); ~~^ %ld --- event-stream.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include event-stream.c --- eval.o --- eval.c: In function 'print_multiple_value': eval.c:4742:56: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Elemcount {aka long int}' [-Wformat=] "#", ~~^ %ld --- event-stream.o --- In file included from event-stream.c:75:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- event-unixoid.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include event-unixoid.c --- event-stream.o --- In file included from event-stream.c:79:0: event-stream.c: In function 'echo_key_event': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:585:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->outbuf = buf; \ ^~~~~~~~~~~~~~~~~~~~~~~~ event-stream.c:649:3: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM ^ lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:586:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->size = bsize; \ ^~~~~~~~~~~~~~~~~~~~~~~~ event-stream.c:649:3: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM ^ --- event-unixoid.o --- In file included from event-unixoid.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- events.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include events.c In file included from events.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ events.c: In function 'print_event': events.c:284:52: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "#pos < 0 ? soe->pos : ~~~~~~~~~~~~~~~~~~~~~~~~~ object_memxpos_to_bytexpos (obj, soe->pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extents.c:1038:42: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Memxpos {aka long int}' [-Wformat=] stderr_out ("SOE pos is %zd (memxpos %zd)\n", ~~^ %ld extents.c:1041:8: soe->pos); ~~~~~~~~ extents.c: In function 'soe_move': extents.c:1144:36: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] stderr_out ("Moving SOE from %zd (memxpos %zd) to %zd (memxpos %zd)\n", ~~^ %ld soe->pos < 0 ? soe->pos : ~~~~~~~~~~~~~~~~~~~~~~~~~ object_memxpos_to_bytexpos (obj, soe->pos), soe->pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extents.c:1144:49: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Memxpos {aka long int}' [-Wformat=] stderr_out ("Moving SOE from %zd (memxpos %zd) to %zd (memxpos %zd)\n", ~~^ %ld extents.c:1146:47: object_memxpos_to_bytexpos (obj, soe->pos), soe->pos, ~~~~~~~~ extents.c:1144:57: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'Bytexpos {aka long int}' [-Wformat=] stderr_out ("Moving SOE from %zd (memxpos %zd) to %zd (memxpos %zd)\n", ~~^ %ld extents.c:1147:3: object_memxpos_to_bytexpos (obj, pos), pos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extents.c:1144:70: warning: format '%zd' expects argument of type 'signed size_t', but argument 5 has type 'Memxpos {aka long int}' [-Wformat=] stderr_out ("Moving SOE from %zd (memxpos %zd) to %zd (memxpos %zd)\n", ~~^ %ld --- file-coding.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include file-coding.c --- extents.o --- extents.c: In function 'print_extent_1': extents.c:2798:40: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] "%zd, %zd", ~~^ %ld extents.c:2798:45: warning: format '%zd' expects argument of type 'signed size_t', but argument 5 has type 'long int' [-Wformat=] "%zd, %zd", ~~^ %ld --- fileio.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include fileio.c --- file-coding.o --- In file included from file-coding.c:70:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- fileio.o --- In file included from fileio.c:30:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- file-coding.o --- In file included from file-coding.c:76:0: file-coding.c: In function 'coding_stream_coding_system': file-coding.h:1146:63: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] #define CODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, coding) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ file-coding.c:2558:10: note: in expansion of macro 'CODING_STREAM_DATA' return CODING_STREAM_DATA (stream)->codesys; ^~~~~~~~~~~~~~~~~~ file-coding.c: In function 'detect_coding_type': file-coding.c:4464:52: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'Bytecount {aka long int}' [-Wformat=] debug_out ("detect_coding_type: processing %zd bytes\n", n); ~~^ %ld --- filelock.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include filelock.c In file included from filelock.c:21:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- filemode.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include filemode.c --- floatfns.o --- --- filemode.o --- In file included from filemode.c:21:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- floatfns.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include floatfns.c In file included from floatfns.c:45:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- fns.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include fns.c --- font-lock.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include font-lock.c --- fns.o --- In file included from fns.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- font-lock.o --- In file included from font-lock.c:45:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- fontcolor.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include fontcolor.c --- frame.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include frame.c --- fontcolor.o --- In file included from fontcolor.c:24:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- frame.o --- In file included from frame.c:365:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gc.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include gc.c --- general.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include general.c --- gc.o --- In file included from gc.c:46:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- general.o --- In file included from general.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- glyphs-eimage.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include glyphs-eimage.c --- gc.o --- gc.c: In function 'lispdesc_indirect_count_1': gc.c:212:69: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] stderr_out ("Unsupported count type : %d (line = %d, code = %zd)\n", ~~^ %ld --- glyphs-eimage.o --- In file included from glyphs-eimage.c:41:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- glyphs-shared.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include glyphs-shared.c In file included from glyphs-shared.c:30:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- glyphs-widget.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include glyphs-widget.c In file included from glyphs-widget.c:24:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- glyphs.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include glyphs.c --- gui.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include gui.c --- gutter.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include gutter.c --- glyphs.o --- In file included from glyphs.c:45:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gui.o --- In file included from gui.c:26:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gutter.o --- In file included from gutter.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- glyphs.o --- glyphs.c: In function 'print_image_instance': glyphs.c:970:41: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "%zd", ~~^ %ld glyphs.c:976:41: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "%zd", ~~^ %ld --- imgproc.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include imgproc.c In file included from imgproc.c:42:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- indent.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include indent.c In file included from indent.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- inline.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include inline.c In file included from inline.c:42:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- input-method-xlib.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include input-method-xlib.c In file included from input-method-xlib.c:70:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- insdel.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include insdel.c In file included from insdel.c:34:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- intl.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include intl.c In file included from intl.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- keymap.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include keymap.c --- lastfile.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lastfile.c --- keymap.o --- In file included from keymap.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ keymap.c: In function 'print_keymap': keymap.c:302:43: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "size %zd 0x%x>", ~~^ %ld --- line-number.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include line-number.c --- keymap.o --- In file included from keymap.c:31:0: keymap.c: In function 'where_is_to_Ibyte': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:585:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->outbuf = buf; \ ^~~~~~~~~~~~~~~~~~~~~~~~ keymap.c:3895:3: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (stream, buffer, bufsize); ^ lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:586:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->size = bsize; \ ^~~~~~~~~~~~~~~~~~~~~~~~ keymap.c:3895:3: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (stream, buffer, bufsize); ^ --- linuxplay.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include linuxplay.c --- line-number.o --- In file included from line-number.c:51:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- linuxplay.o --- In file included from linuxplay.c:58:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- lread.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lread.c In file included from lread.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- lstream.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lstream.c In file included from lstream.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- macros.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include macros.c --- lstream.o --- In file included from lstream.c:31:0: lstream.c: In function 'stdio_rewinder': lstream.c:1076:62: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] #define STDIO_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, stdio) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1153:11: note: in expansion of macro 'STDIO_STREAM_DATA' rewind (STDIO_STREAM_DATA (stream)->file); ^~~~~~~~~~~~~~~~~ lstream.c: In function 'filedesc_rewinder': lstream.c:1206:65: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] #define FILEDESC_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, filedesc) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1464:14: note: in expansion of macro 'FILEDESC_STREAM_DATA' lseek (FILEDESC_STREAM_DATA (stream)->fd, str->starting_pos, ^~~~~~~~~~~~~~~~~~~~ lstream.c: In function 'fixed_buffer_rewinder': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1694:3: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (stream)->offset = 0; ^~~~~~~~~~~~~~~~~~~~~~~~ lstream.c: In function 'fixed_buffer_input_stream_ptr': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1702:10: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' return FIXED_BUFFER_STREAM_DATA (stream)->inbuf; ^~~~~~~~~~~~~~~~~~~~~~~~ lstream.c: In function 'fixed_buffer_output_stream_ptr': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1709:10: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' return FIXED_BUFFER_STREAM_DATA (stream)->outbuf; ^~~~~~~~~~~~~~~~~~~~~~~~ lstream.c: In function 'make_resizing_buffer_output_stream': lstream.c:1715:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, resizing_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1738:3: note: in expansion of macro 'RESIZING_BUFFER_STREAM_DATA' RESIZING_BUFFER_STREAM_DATA (stream)->extent_info ^~~~~~~~~~~~~~~~~~~~~~~~~~~ lstream.c: In function 'resizing_buffer_rewinder': lstream.c:1715:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, resizing_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1775:3: note: in expansion of macro 'RESIZING_BUFFER_STREAM_DATA' RESIZING_BUFFER_STREAM_DATA (stream)->stored = 0; ^~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from lisp.h:1841:0, from lstream.c:26: lstream.c: In function 'resizing_buffer_extent_info': lstream.c:1715:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, resizing_buffer) ^ lrecord.h:1616:25: note: in definition of macro 'XRECORD' error_check_##c_name (x, __FILE__, __LINE__) ^ lstream.c:1795:10: note: in expansion of macro 'XEXTENT_INFO' return XEXTENT_INFO (RESIZING_BUFFER_STREAM_DATA (stream)->extent_info); ^~~~~~~~~~~~ lstream.c:1715:3: note: in expansion of macro 'LSTREAM_TYPE_DATA' LSTREAM_TYPE_DATA (stream, resizing_buffer) ^~~~~~~~~~~~~~~~~ lstream.c:1795:24: note: in expansion of macro 'RESIZING_BUFFER_STREAM_DATA' return XEXTENT_INFO (RESIZING_BUFFER_STREAM_DATA (stream)->extent_info); ^~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from lstream.c:31:0: lstream.c: In function 'resizing_buffer_stream_ptr': lstream.c:1715:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, resizing_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1801:10: note: in expansion of macro 'RESIZING_BUFFER_STREAM_DATA' return RESIZING_BUFFER_STREAM_DATA (stream)->buf; ^~~~~~~~~~~~~~~~~~~~~~~~~~~ lstream.c: In function 'resizing_buffer_to_lisp_string': lstream.c:1715:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, resizing_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1810:25: note: in expansion of macro 'RESIZING_BUFFER_STREAM_DATA' result = make_string (RESIZING_BUFFER_STREAM_DATA (stream)->buf, ^~~~~~~~~~~~~~~~~~~~~~~~~~~ lstream.c: In function 'make_dynarr_output_stream': lstream.c:1874:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, dynarr) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1888:3: note: in expansion of macro 'DYNARR_STREAM_DATA' DYNARR_STREAM_DATA (XLSTREAM (obj))->dyn = dyn; ^~~~~~~~~~~~~~~~~~ In file included from lisp.h:1885:0, from lstream.c:26: lstream.c: In function 'dynarr_rewinder': lstream.c:1874:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, dynarr) ^ array.h:458:45: note: in definition of macro 'Dynarr_reset' #define Dynarr_reset(d) Dynarr_set_lengthr (d, 0) ^ lstream.c:1874:3: note: in expansion of macro 'LSTREAM_TYPE_DATA' LSTREAM_TYPE_DATA (stream, dynarr) ^~~~~~~~~~~~~~~~~ lstream.c:1904:17: note: in expansion of macro 'DYNARR_STREAM_DATA' Dynarr_reset (DYNARR_STREAM_DATA (stream)->dyn); ^~~~~~~~~~~~~~~~~~ In file included from lstream.c:31:0: lstream.c: In function 'lisp_buffer_stream_startpos': lstream.c:1921:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, lisp_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:2122:27: note: in expansion of macro 'LISP_BUFFER_STREAM_DATA' return marker_position (LISP_BUFFER_STREAM_DATA (stream)->start); ^~~~~~~~~~~~~~~~~~~~~~~ --- macros.o --- In file included from macros.c:30:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- marker.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include marker.c In file included from marker.c:32:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- md5.o --- --- menubar.o --- --- md5.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include md5.c --- menubar.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include menubar.c --- md5.o --- In file included from md5.c:26:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- menubar.o --- In file included from menubar.c:31:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- marker.o --- marker.c: In function 'print_marker': marker.c:51:45: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Charbpos {aka long int}' [-Wformat=] write_fmt_string (printcharfun, "at %zd in ", ~~^ %ld marker_position (obj)); ~~~~~~~~~~~~~~~~~~~~~ --- minibuf.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include minibuf.c --- miscplay.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include miscplay.c --- minibuf.o --- In file included from minibuf.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- number-gmp.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include number-gmp.c --- miscplay.o --- In file included from miscplay.c:23:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- number-gmp.o --- In file included from number-gmp.c:21:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- number.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include number.c --- opaque.o --- --- number-gmp.o --- number-gmp.c: In function 'bigfloat_to_string': number-gmp.c:182:37: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] "E%zd", expt); ~~^ %ld --- opaque.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include opaque.c --- number.o --- In file included from number.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- opaque.o --- In file included from opaque.c:35:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- print.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include print.c --- opaque.o --- opaque.c: In function 'print_opaque': opaque.c:50:55: warning: format '%zu' expects argument of type 'size_t', but argument 3 has type 'long unsigned int' [-Wformat=] "#", ~~^ %lu (EMACS_UINT)(p->size), LISP_OBJECT_UID (obj)); ~~~~~~~~~~~~~~~~~~~~~ opaque.c: In function 'hash_opaque': opaque.c:105:5: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] return *((Hashcode *) XOPAQUE_DATA (obj)); ^~~~~~ --- print.o --- In file included from print.c:33:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- process-unix.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include process-unix.c --- process.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include process.c --- process-unix.o --- In file included from process-unix.c:31:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- profile.o --- --- process.o --- In file included from process.c:34:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- profile.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include profile.c --- print.o --- print.c: In function 'printing_major_badness': print.c:1644:63: warning: format '%zd' expects argument of type 'signed size_t', but argument 6 has type 'long int' [-Wformat=] emacs_snprintf (buf, sizeof (buf), "%s type %d object %zd", ~~^ %ld badness_string, type, (EMACS_INT) val); ~~~~~~~~~~~~~~~ print.c: In function 'print_gensym_or_circle': print.c:1894:47: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "#%zd#", ~~^ %ld (XFIXNUM (seen) & PRINT_NUMBER_ORDINAL_MASK) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >> PRINT_NUMBER_ORDINAL_SHIFT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print.c:1903:47: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "#%zd=", ~~^ %ld (XFIXNUM (seen) & PRINT_NUMBER_ORDINAL_MASK) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >> PRINT_NUMBER_ORDINAL_SHIFT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print.c: In function 'debug_p4': print.c:2813:21: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] debug_out ("%zd", XFIXNUM (obj)); ~~^ %ld print.c:2827:28: warning: format '%zx' expects argument of type 'size_t', but argument 3 has type 'long int' [-Wformat=] debug_out ("#<%s addr=0x%zx uid=0x%zx>", ~~^ %lx print.c:2829:6: (EMACS_INT) header, ~~~~~~~~~~~~~~~~~~ print.c:2827:38: warning: format '%zx' expects argument of type 'size_t', but argument 4 has type 'long int' [-Wformat=] debug_out ("#<%s addr=0x%zx uid=0x%zx>", ~~^ %lx print.c:2830:6: (EMACS_INT) ((struct lrecord_header *) header)->uid); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- ralloc.o --- --- profile.o --- In file included from profile.c:20:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- ralloc.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include ralloc.c --- process-unix.o --- process-unix.c: In function 'unix_init_process_io_handles': process-unix.c:842:52: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] "failed setting pipe (fd %zd) to nonblocking mode", ~~^ %ld (EMACS_INT) in); ~~~~~~~~~~~~~~ --- rangetab.o --- --- process-unix.o --- process-unix.c: In function 'unix_open_network_stream': process-unix.c:1901:36: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] "%zd", XFIXNUM (service)); ~~^ %ld --- rangetab.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include rangetab.c --- ralloc.o --- In file included from ralloc.c:29:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- rangetab.o --- In file included from rangetab.c:24:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ rangetab.c: In function 'print_range_table': rangetab.c:125:44: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "%c%zd %zd%c ", ~~^ %ld rangetab.c:127:4: (Bytecount) (rte.first - so), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rangetab.c:125:48: warning: format '%zd' expects argument of type 'signed size_t', but argument 5 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "%c%zd %zd%c ", ~~^ %ld rangetab.c:128:4: (Bytecount) (rte.last - ec), ~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- realpath.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include realpath.c In file included from realpath.c:29:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- redisplay-output.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include redisplay-output.c In file included from redisplay-output.c:32:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- redisplay.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include redisplay.c --- regex.o --- --- scrollbar.o --- --- regex.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include regex.c --- search.o --- --- redisplay.o --- In file included from redisplay.c:46:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- search.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include search.c --- scrollbar.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include scrollbar.c --- select.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include select.c --- regex.o --- In file included from regex.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- search.o --- In file included from search.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- scrollbar.o --- In file included from scrollbar.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- select.o --- In file included from select.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- regex.o --- regex.c: In function 'print_partial_compiled_pattern': regex.c:851:18: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("%zd:\t", (Bytecount)(p - start)); ~~^ ~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c:953:19: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] printf ("(0x%zx)", (Bytecount)first); ~~^ %lx regex.c:960:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] printf ("(0x%zx)", (Bytecount)last); ~~^ %lx regex.c:979:37: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/on_failure_jump to %zd", (Bytecount)(p + mcnt - start)); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c:984:49: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/on_failure_keep_string_jump to %zd", ~~^ %ld (Bytecount)(p + mcnt - start)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:990:40: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/dummy_failure_jump to %zd", ~~^ %ld (Bytecount)(p + mcnt - start)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:1000:36: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/maybe_pop_jump to %zd", (Bytecount)(p + mcnt - start)); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c:1005:38: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/pop_failure_jump to %zd", (Bytecount)(p + mcnt - start)); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c:1010:35: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/jump_past_alt to %zd", (Bytecount)(p + mcnt - start)); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c:1015:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/jump to %zd", (Bytecount)(p + mcnt - start)); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c:1021:29: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/succeed_n to %zd, %d times", ~~^ %ld (Bytecount)(p + mcnt - start), mcnt2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:1028:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/jump_n to %zd, %d times", ~~^ %ld (Bytecount)(p + mcnt - start), mcnt2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:1035:39: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/set_number_at location %zd to %d", ~~^ %ld (Bytecount)(p + mcnt - start), mcnt2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:1119:14: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("%zd:\tend of pattern.\n", (Bytecount)(p - start)); ~~^ ~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c: In function 'print_compiled_pattern': regex.c:1129:14: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'Bytecount {aka long int}' [-Wformat=] printf ("%zd bytes used/%zd bytes allocated.\n", bufp->used, ~~^ ~~~~~~~~~~ %ld regex.c:1129:29: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Bytecount {aka long int}' [-Wformat=] printf ("%zd bytes used/%zd bytes allocated.\n", bufp->used, ~~^ %ld bufp->allocated); ~~~~~~~~~~~~~~~ regex.c:1138:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("re_nsub: %zd\t", (Bytecount)bufp->re_nsub); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c:1139:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("re_ngroups: %zd\t", (Bytecount)bufp->re_ngroups); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ %ld --- redisplay.o --- In file included from redisplay.c:50:0: redisplay.c: In function 'add_disp_table_entry_runes_1': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:585:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->outbuf = buf; \ ^~~~~~~~~~~~~~~~~~~~~~~~ redisplay.c:1552:11: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (format_buf_lispobj, ^ lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:586:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->size = bsize; \ ^~~~~~~~~~~~~~~~~~~~~~~~ redisplay.c:1552:11: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (format_buf_lispobj, ^ lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:585:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->outbuf = buf; \ ^~~~~~~~~~~~~~~~~~~~~~~~ redisplay.c:1566:15: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (format_buf_lispobj, ^ lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:586:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->size = bsize; \ ^~~~~~~~~~~~~~~~~~~~~~~~ redisplay.c:1566:15: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (format_buf_lispobj, ^ --- regex.o --- regex.c: In function 're_match_2_internal': regex.c:5796:27: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ^ ~~~~~~~~~~~~~ regex.c:789:50: note: in definition of macro 'DEBUG_MATCH_PRINT2' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) ^~ regex.c:6202:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ^ regex.c:789:50: note: in definition of macro 'DEBUG_MATCH_PRINT2' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) ^~ regex.c:6206:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ^ regex.c:789:50: note: in definition of macro 'DEBUG_MATCH_PRINT2' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) ^~ regex.c:6249:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ^ regex.c:789:50: note: in definition of macro 'DEBUG_MATCH_PRINT2' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) ^~ regex.c:6252:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT2 (" regend: %zd\n", ^ regex.c:789:50: note: in definition of macro 'DEBUG_MATCH_PRINT2' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) ^~ regex.c:1731:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Before push, next avail: %zd\n", \ ^ (Bytecount) (fail_stack).avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1733:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1737:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1756:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 ("\n Doubled stack; size now: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1758:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" slots available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1777:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ^ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1779:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ^ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1781:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" info: 0x%zx\n ", \ ^ * (long *) (®_info[this_reg])); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1814:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing pattern 0x%zx: \n", \ ^ (Bytecount) pattern_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) string_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (string_place, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing failure id: %u\n", failure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* This is the number of items that are pushed and popped on the stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for each register. */ ~~~~~~~~~~~~~~~~~~~~~~ #define NUM_REG_ITEMS 3 ~~~~~~~~~~~~~~~~~~~~~~~~ /* Individual items aside from the registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ #define NUM_NONREG_ITEMS 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We push at most this many items on the stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We used to use (num_regs - 1), which is the number of registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this regexp will save; but that was changed to 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to avoid stack overflow for a regexp with lots of parens. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We actually push this many items. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NUM_FAILURE_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~ /* How many items can still be added to the stack without overflowing it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Pops what PUSH_FAIL_STACK pushes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We restore into the following parameters, all of which should be lvalues: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STR -- the saved data position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PAT -- the saved pattern position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LOW_REG, HIGH_REG -- the highest and lowest active registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGSTART, REGEND -- arrays of string positions. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_INFO -- array of information about each subexpression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Also assumes the variables `fail_stack' and (if debugging), `bufp', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pend', `string1', `size1', `string2', and `size2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart, regend, reg_info) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ DEBUG_STATEMENT (int ffailure_id;) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int this_reg; \ ~~~~~~~~~~~~~~~~~~~~~~ const unsigned char *string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Remove failure points and point to how many regs pushed. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("POP_FAILURE_POINT:\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Before pop, next avail: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ DEBUG_STATEMENT (ffailure_id = POP_FAILURE_INT()); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* If the saved string location is NULL, it came from an \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_keep_string_jump opcode, and we want to throw away the \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ saved NULL, thus retaining our current position in the string. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string_temp = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (string_temp != NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ str = string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ pat = (unsigned char *) POP_FAILURE_POINTER (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Restore register info. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ high_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ low_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping failure id: %d\n", ffailure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping string 0x%zx: `", (Bytecount) str); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1817:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ^ (Bytecount) string_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (string_place, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing failure id: %u\n", failure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* This is the number of items that are pushed and popped on the stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for each register. */ ~~~~~~~~~~~~~~~~~~~~~~ #define NUM_REG_ITEMS 3 ~~~~~~~~~~~~~~~~~~~~~~~~ /* Individual items aside from the registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ #define NUM_NONREG_ITEMS 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We push at most this many items on the stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We used to use (num_regs - 1), which is the number of registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this regexp will save; but that was changed to 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to avoid stack overflow for a regexp with lots of parens. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We actually push this many items. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NUM_FAILURE_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~ /* How many items can still be added to the stack without overflowing it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Pops what PUSH_FAIL_STACK pushes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We restore into the following parameters, all of which should be lvalues: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STR -- the saved data position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PAT -- the saved pattern position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LOW_REG, HIGH_REG -- the highest and lowest active registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGSTART, REGEND -- arrays of string positions. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_INFO -- array of information about each subexpression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Also assumes the variables `fail_stack' and (if debugging), `bufp', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pend', `string1', `size1', `string2', and `size2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart, regend, reg_info) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ DEBUG_STATEMENT (int ffailure_id;) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int this_reg; \ ~~~~~~~~~~~~~~~~~~~~~~ const unsigned char *string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Remove failure points and point to how many regs pushed. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("POP_FAILURE_POINT:\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Before pop, next avail: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ DEBUG_STATEMENT (ffailure_id = POP_FAILURE_INT()); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* If the saved string location is NULL, it came from an \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_keep_string_jump opcode, and we want to throw away the \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ saved NULL, thus retaining our current position in the string. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string_temp = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (string_temp != NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ str = string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ pat = (unsigned char *) POP_FAILURE_POINTER (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Restore register info. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ high_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ low_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping failure id: %d\n", ffailure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping string 0x%zx: `", (Bytecount) str); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:6534:31: warning: format '%zx' expects argument of type 'size_t', but argument 3 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ^ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~ regex.c:791:50: note: in definition of macro 'DEBUG_MATCH_PRINT3' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2, x3) ^~ regex.c:1731:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Before push, next avail: %zd\n", \ ^ (Bytecount) (fail_stack).avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1733:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1737:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1756:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 ("\n Doubled stack; size now: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1758:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" slots available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1777:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ^ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1779:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ^ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1781:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" info: 0x%zx\n ", \ ^ * (long *) (®_info[this_reg])); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1814:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing pattern 0x%zx: \n", \ ^ (Bytecount) pattern_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) string_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (string_place, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing failure id: %u\n", failure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* This is the number of items that are pushed and popped on the stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for each register. */ ~~~~~~~~~~~~~~~~~~~~~~ #define NUM_REG_ITEMS 3 ~~~~~~~~~~~~~~~~~~~~~~~~ /* Individual items aside from the registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ #define NUM_NONREG_ITEMS 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We push at most this many items on the stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We used to use (num_regs - 1), which is the number of registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this regexp will save; but that was changed to 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to avoid stack overflow for a regexp with lots of parens. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We actually push this many items. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NUM_FAILURE_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~ /* How many items can still be added to the stack without overflowing it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Pops what PUSH_FAIL_STACK pushes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We restore into the following parameters, all of which should be lvalues: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STR -- the saved data position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PAT -- the saved pattern position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LOW_REG, HIGH_REG -- the highest and lowest active registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGSTART, REGEND -- arrays of string positions. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_INFO -- array of information about each subexpression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Also assumes the variables `fail_stack' and (if debugging), `bufp', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pend', `string1', `size1', `string2', and `size2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart, regend, reg_info) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ DEBUG_STATEMENT (int ffailure_id;) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int this_reg; \ ~~~~~~~~~~~~~~~~~~~~~~ const unsigned char *string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Remove failure points and point to how many regs pushed. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("POP_FAILURE_POINT:\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Before pop, next avail: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ DEBUG_STATEMENT (ffailure_id = POP_FAILURE_INT()); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* If the saved string location is NULL, it came from an \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_keep_string_jump opcode, and we want to throw away the \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ saved NULL, thus retaining our current position in the string. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string_temp = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (string_temp != NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ str = string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ pat = (unsigned char *) POP_FAILURE_POINTER (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Restore register info. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ high_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ low_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping failure id: %d\n", ffailure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping string 0x%zx: `", (Bytecount) str); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ } ~ } ~ /* We used to move past the register number and inner group count ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here, when registers were just one byte; that's no longer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ necessary with EXTRACT_NUMBER_AND_INCR(), above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* \ has been turned into a `duplicate' command which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ followed by the numeric value of as the register number. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Already passed through external-to-internal-register mapping, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it refers to the actual group number, not the non-shy-only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numbering used in the external world.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ { ~ REGISTER re_char *d2, *dend2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Get which register to match against. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regno; ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference a group which we've never matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* Where in input to try to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = regstart[regno]; ~~~~~~~~~~~~~~~~~~~~~ /* Where to stop matching; if both the place to start and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the place to stop matching are in the same string, then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set to the place to stop, otherwise, for now have to use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the first string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend2 = ((FIRST_STRING_P (regstart[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == FIRST_STRING_P (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? regend[regno] : end_match_1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ /* If necessary, advance to next segment in register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents. */ ~~~~~~~~~~~~~ while (d2 == dend2) ~~~~~~~~~~~~~~~~~~~ { ~ if (dend2 == end_match_2) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend2 == regend[regno]) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = string2; ~~~~~~~~~~~~~ dend2 = regend[regno]; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* At end of register contents => success */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d2 == dend2) break; ~~~~~~~~~~~~~~~~~~~~~~~ /* If necessary, advance to next segment in data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ /* How many characters left in this segment to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend - d; ~~~~~~~~~~~~~~~~ /* Want how many consecutive characters we can match in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one shot, so, if necessary, adjust the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt > dend2 - d2) ~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend2 - d2; ~~~~~~~~~~~~~~~~~~ /* Compare that many; failure if mismatch, else move ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ past them. */ ~~~~~~~~~~~~~~ if (TRANSLATE_P (translate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bcmp_translate (d, d2, mcnt, translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , fmt, lispobj ~~~~~~~~~~~~~~ #endif ~~~~~~ ) ~ : memcmp (d, d2, mcnt)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ d += mcnt, d2 += mcnt; ~~~~~~~~~~~~~~~~~~~~~~ /* Do this because we've match some characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ } ~ break; ~~~~~~ /* begline matches the empty string at the beginning of the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (unless `not_bol' is set in `bufp'), and, if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `newline_anchor' is set, after newlines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_bol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ re_char *d2 = d; ~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (d2); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* In all other cases, we fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* endline is the dual of begline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_eol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We have to ``prefetch'' the next character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if ((d == end1 ? ~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (string2, fmt, lispobj) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ goto fail; ~~~~~~~~~~ /* Match at the very beginning of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* Match at the very end of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* on_failure_keep_string_jump is used to optimize `.*\n'. It ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pushes NULL as the value for the string on the stack. Then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point' will keep the current value for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string, instead of restoring it. To see why, consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching `foo\nbar' against `.*\n'. The .* matches the foo; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the . fails against the \n. But the next thing we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to do is match the \n against the \n; if we restored the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string value, we would be back at the foo. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this is used only in specific cases, we don't need to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check all the things that `on_failure_jump' does, to make ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sure the right things get saved on the stack. Hence we don't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ share its code. The only reason to push anything on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack at all is that otherwise we would have to change ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `anychar's code to do something besides goto fail in this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case; that seems worse than this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1817:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:6558:31: warning: format '%zx' expects argument of type 'size_t', but argument 3 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT3 (" %d (to 0x%zx)", mcnt, (Bytecount) (p + mcnt)); ^ ~~~~~~~~~~~~~~~~~~~~~~ regex.c:791:50: note: in definition of macro 'DEBUG_MATCH_PRINT3' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2, x3) ^~ regex.c:1731:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Before push, next avail: %zd\n", \ ^ (Bytecount) (fail_stack).avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1733:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1737:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1756:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 ("\n Doubled stack; size now: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1758:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" slots available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1777:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ^ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1779:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ^ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1781:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" info: 0x%zx\n ", \ ^ * (long *) (®_info[this_reg])); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1814:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing pattern 0x%zx: \n", \ ^ (Bytecount) pattern_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) string_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (string_place, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing failure id: %u\n", failure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* This is the number of items that are pushed and popped on the stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for each register. */ ~~~~~~~~~~~~~~~~~~~~~~ #define NUM_REG_ITEMS 3 ~~~~~~~~~~~~~~~~~~~~~~~~ /* Individual items aside from the registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ #define NUM_NONREG_ITEMS 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We push at most this many items on the stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We used to use (num_regs - 1), which is the number of registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this regexp will save; but that was changed to 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to avoid stack overflow for a regexp with lots of parens. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We actually push this many items. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NUM_FAILURE_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~ /* How many items can still be added to the stack without overflowing it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Pops what PUSH_FAIL_STACK pushes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We restore into the following parameters, all of which should be lvalues: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STR -- the saved data position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PAT -- the saved pattern position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LOW_REG, HIGH_REG -- the highest and lowest active registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGSTART, REGEND -- arrays of string positions. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_INFO -- array of information about each subexpression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Also assumes the variables `fail_stack' and (if debugging), `bufp', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pend', `string1', `size1', `string2', and `size2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart, regend, reg_info) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ DEBUG_STATEMENT (int ffailure_id;) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int this_reg; \ ~~~~~~~~~~~~~~~~~~~~~~ const unsigned char *string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Remove failure points and point to how many regs pushed. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("POP_FAILURE_POINT:\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Before pop, next avail: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ DEBUG_STATEMENT (ffailure_id = POP_FAILURE_INT()); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* If the saved string location is NULL, it came from an \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_keep_string_jump opcode, and we want to throw away the \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ saved NULL, thus retaining our current position in the string. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string_temp = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (string_temp != NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ str = string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ pat = (unsigned char *) POP_FAILURE_POINTER (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Restore register info. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ high_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ low_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping failure id: %d\n", ffailure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping string 0x%zx: `", (Bytecount) str); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ } ~ } ~ /* We used to move past the register number and inner group count ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here, when registers were just one byte; that's no longer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ necessary with EXTRACT_NUMBER_AND_INCR(), above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* \ has been turned into a `duplicate' command which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ followed by the numeric value of as the register number. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Already passed through external-to-internal-register mapping, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it refers to the actual group number, not the non-shy-only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numbering used in the external world.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ { ~ REGISTER re_char *d2, *dend2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Get which register to match against. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regno; ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference a group which we've never matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* Where in input to try to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = regstart[regno]; ~~~~~~~~~~~~~~~~~~~~~ /* Where to stop matching; if both the place to start and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the place to stop matching are in the same string, then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set to the place to stop, otherwise, for now have to use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the first string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend2 = ((FIRST_STRING_P (regstart[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == FIRST_STRING_P (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? regend[regno] : end_match_1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ /* If necessary, advance to next segment in register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents. */ ~~~~~~~~~~~~~ while (d2 == dend2) ~~~~~~~~~~~~~~~~~~~ { ~ if (dend2 == end_match_2) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend2 == regend[regno]) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = string2; ~~~~~~~~~~~~~ dend2 = regend[regno]; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* At end of register contents => success */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d2 == dend2) break; ~~~~~~~~~~~~~~~~~~~~~~~ /* If necessary, advance to next segment in data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ /* How many characters left in this segment to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend - d; ~~~~~~~~~~~~~~~~ /* Want how many consecutive characters we can match in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one shot, so, if necessary, adjust the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt > dend2 - d2) ~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend2 - d2; ~~~~~~~~~~~~~~~~~~ /* Compare that many; failure if mismatch, else move ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ past them. */ ~~~~~~~~~~~~~~ if (TRANSLATE_P (translate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bcmp_translate (d, d2, mcnt, translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , fmt, lispobj ~~~~~~~~~~~~~~ #endif ~~~~~~ ) ~ : memcmp (d, d2, mcnt)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ d += mcnt, d2 += mcnt; ~~~~~~~~~~~~~~~~~~~~~~ /* Do this because we've match some characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ } ~ break; ~~~~~~ /* begline matches the empty string at the beginning of the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (unless `not_bol' is set in `bufp'), and, if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `newline_anchor' is set, after newlines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_bol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ re_char *d2 = d; ~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (d2); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* In all other cases, we fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* endline is the dual of begline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_eol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We have to ``prefetch'' the next character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if ((d == end1 ? ~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (string2, fmt, lispobj) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ goto fail; ~~~~~~~~~~ /* Match at the very beginning of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* Match at the very end of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* on_failure_keep_string_jump is used to optimize `.*\n'. It ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pushes NULL as the value for the string on the stack. Then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point' will keep the current value for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string, instead of restoring it. To see why, consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching `foo\nbar' against `.*\n'. The .* matches the foo; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the . fails against the \n. But the next thing we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to do is match the \n against the \n; if we restored the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string value, we would be back at the foo. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this is used only in specific cases, we don't need to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check all the things that `on_failure_jump' does, to make ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sure the right things get saved on the stack. Hence we don't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ share its code. The only reason to push anything on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack at all is that otherwise we would have to change ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `anychar's code to do something besides goto fail in this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case; that seems worse than this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Uses of on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each alternative starts with an on_failure_jump that points ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the beginning of the next alternative. Each alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ except the last ends with a jump that in effect jumps past ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the rest of the alternatives. (They really jump to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending jump of the following alternative, because tensioning ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ these jumps is a hassle.) ~~~~~~~~~~~~~~~~~~~~~~~~~ Repeats start with an on_failure_jump that points past both ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the repetition text and either the following jump or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump back to this on_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ on_failure: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx)", mcnt, (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this on_failure_jump comes right before a group (i.e., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the original * applied to a group), save the information ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for that group and all inner ones, so that if we fail back ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to this point, the group's information will be correct. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, in \(a*\)*\1, we need the preceding group, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and in \(\(a*\)b*\)\2, we need the inner group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We can't use `p' to check ahead because we push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a failure point to `p + mcnt' after we do this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* We need to skip no_op's before we look for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start_memory in case this on_failure_jump is happening as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against aba. */ ~~~~~~~~~~~~~~~~ while (p1 < pend && (re_opcode_t) *p1 == no_op) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1++; ~~~~~ if (p1 < pend && (re_opcode_t) *p1 == start_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have a new highest active register now. This will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get reset at the start_memory we are about to get to, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but we will have saved all the registers relevant to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this repetition op, as described above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = *(p1 + 1) + *(p1 + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = *(p1 + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 (":\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1817:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ^ (Bytecount) string_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (string_place, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing failure id: %u\n", failure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* This is the number of items that are pushed and popped on the stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for each register. */ ~~~~~~~~~~~~~~~~~~~~~~ #define NUM_REG_ITEMS 3 ~~~~~~~~~~~~~~~~~~~~~~~~ /* Individual items aside from the registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ #define NUM_NONREG_ITEMS 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We push at most this many items on the stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We used to use (num_regs - 1), which is the number of registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this regexp will save; but that was changed to 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to avoid stack overflow for a regexp with lots of parens. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We actually push this many items. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NUM_FAILURE_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~ /* How many items can still be added to the stack without overflowing it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Pops what PUSH_FAIL_STACK pushes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We restore into the following parameters, all of which should be lvalues: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STR -- the saved data position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PAT -- the saved pattern position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LOW_REG, HIGH_REG -- the highest and lowest active registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGSTART, REGEND -- arrays of string positions. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_INFO -- array of information about each subexpression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Also assumes the variables `fail_stack' and (if debugging), `bufp', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pend', `string1', `size1', `string2', and `size2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart, regend, reg_info) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ DEBUG_STATEMENT (int ffailure_id;) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int this_reg; \ ~~~~~~~~~~~~~~~~~~~~~~ const unsigned char *string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Remove failure points and point to how many regs pushed. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("POP_FAILURE_POINT:\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Before pop, next avail: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ DEBUG_STATEMENT (ffailure_id = POP_FAILURE_INT()); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* If the saved string location is NULL, it came from an \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_keep_string_jump opcode, and we want to throw away the \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ saved NULL, thus retaining our current position in the string. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string_temp = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (string_temp != NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ str = string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ pat = (unsigned char *) POP_FAILURE_POINTER (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Restore register info. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ high_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ low_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping failure id: %d\n", ffailure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping string 0x%zx: `", (Bytecount) str); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ } ~ } ~ /* We used to move past the register number and inner group count ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here, when registers were just one byte; that's no longer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ necessary with EXTRACT_NUMBER_AND_INCR(), above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* \ has been turned into a `duplicate' command which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ followed by the numeric value of as the register number. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Already passed through external-to-internal-register mapping, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it refers to the actual group number, not the non-shy-only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numbering used in the external world.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ { ~ REGISTER re_char *d2, *dend2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Get which register to match against. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regno; ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference a group which we've never matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* Where in input to try to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = regstart[regno]; ~~~~~~~~~~~~~~~~~~~~~ /* Where to stop matching; if both the place to start and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the place to stop matching are in the same string, then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set to the place to stop, otherwise, for now have to use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the first string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend2 = ((FIRST_STRING_P (regstart[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == FIRST_STRING_P (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? regend[regno] : end_match_1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ /* If necessary, advance to next segment in register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents. */ ~~~~~~~~~~~~~ while (d2 == dend2) ~~~~~~~~~~~~~~~~~~~ { ~ if (dend2 == end_match_2) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend2 == regend[regno]) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = string2; ~~~~~~~~~~~~~ dend2 = regend[regno]; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* At end of register contents => success */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d2 == dend2) break; ~~~~~~~~~~~~~~~~~~~~~~~ /* If necessary, advance to next segment in data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ /* How many characters left in this segment to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend - d; ~~~~~~~~~~~~~~~~ /* Want how many consecutive characters we can match in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one shot, so, if necessary, adjust the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt > dend2 - d2) ~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend2 - d2; ~~~~~~~~~~~~~~~~~~ /* Compare that many; failure if mismatch, else move ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ past them. */ ~~~~~~~~~~~~~~ if (TRANSLATE_P (translate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bcmp_translate (d, d2, mcnt, translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , fmt, lispobj ~~~~~~~~~~~~~~ #endif ~~~~~~ ) ~ : memcmp (d, d2, mcnt)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ d += mcnt, d2 += mcnt; ~~~~~~~~~~~~~~~~~~~~~~ /* Do this because we've match some characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ } ~ break; ~~~~~~ /* begline matches the empty string at the beginning of the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (unless `not_bol' is set in `bufp'), and, if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `newline_anchor' is set, after newlines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_bol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ re_char *d2 = d; ~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (d2); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* In all other cases, we fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* endline is the dual of begline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_eol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We have to ``prefetch'' the next character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if ((d == end1 ? ~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (string2, fmt, lispobj) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ goto fail; ~~~~~~~~~~ /* Match at the very beginning of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* Match at the very end of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* on_failure_keep_string_jump is used to optimize `.*\n'. It ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pushes NULL as the value for the string on the stack. Then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point' will keep the current value for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string, instead of restoring it. To see why, consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching `foo\nbar' against `.*\n'. The .* matches the foo; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the . fails against the \n. But the next thing we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to do is match the \n against the \n; if we restored the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string value, we would be back at the foo. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this is used only in specific cases, we don't need to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check all the things that `on_failure_jump' does, to make ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sure the right things get saved on the stack. Hence we don't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ share its code. The only reason to push anything on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack at all is that otherwise we would have to change ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `anychar's code to do something besides goto fail in this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case; that seems worse than this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Uses of on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each alternative starts with an on_failure_jump that points ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the beginning of the next alternative. Each alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ except the last ends with a jump that in effect jumps past ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the rest of the alternatives. (They really jump to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending jump of the following alternative, because tensioning ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ these jumps is a hassle.) ~~~~~~~~~~~~~~~~~~~~~~~~~ Repeats start with an on_failure_jump that points past both ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the repetition text and either the following jump or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump back to this on_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ on_failure: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx)", mcnt, (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this on_failure_jump comes right before a group (i.e., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the original * applied to a group), save the information ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for that group and all inner ones, so that if we fail back ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to this point, the group's information will be correct. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, in \(a*\)*\1, we need the preceding group, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and in \(\(a*\)b*\)\2, we need the inner group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We can't use `p' to check ahead because we push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a failure point to `p + mcnt' after we do this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* We need to skip no_op's before we look for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start_memory in case this on_failure_jump is happening as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against aba. */ ~~~~~~~~~~~~~~~~ while (p1 < pend && (re_opcode_t) *p1 == no_op) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1++; ~~~~~ if (p1 < pend && (re_opcode_t) *p1 == start_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have a new highest active register now. This will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get reset at the start_memory we are about to get to, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but we will have saved all the registers relevant to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this repetition op, as described above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = *(p1 + 1) + *(p1 + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = *(p1 + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 (":\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1877:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Before pop, next avail: %zd\n", \ ^ (Bytecount) fail_stack.avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6767:13: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (sdummy, pdummy, ^~~~~~~~~~~~~~~~~ regex.c:1879:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ^ (Bytecount) fail_stack.size); \ ~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6767:13: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (sdummy, pdummy, ^~~~~~~~~~~~~~~~~ regex.c:1901:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Popping string 0x%zx: `", (Bytecount) str); \ ^ ~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ } ~ } ~ /* We used to move past the register number and inner group count ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here, when registers were just one byte; that's no longer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ necessary with EXTRACT_NUMBER_AND_INCR(), above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* \ has been turned into a `duplicate' command which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ followed by the numeric value of as the register number. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Already passed through external-to-internal-register mapping, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it refers to the actual group number, not the non-shy-only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numbering used in the external world.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ { ~ REGISTER re_char *d2, *dend2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Get which register to match against. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regno; ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference a group which we've never matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* Where in input to try to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = regstart[regno]; ~~~~~~~~~~~~~~~~~~~~~ /* Where to stop matching; if both the place to start and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the place to stop matching are in the same string, then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set to the place to stop, otherwise, for now have to use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the first string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend2 = ((FIRST_STRING_P (regstart[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == FIRST_STRING_P (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? regend[regno] : end_match_1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ /* If necessary, advance to next segment in register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents. */ ~~~~~~~~~~~~~ while (d2 == dend2) ~~~~~~~~~~~~~~~~~~~ { ~ if (dend2 == end_match_2) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend2 == regend[regno]) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = string2; ~~~~~~~~~~~~~ dend2 = regend[regno]; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* At end of register contents => success */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d2 == dend2) break; ~~~~~~~~~~~~~~~~~~~~~~~ /* If necessary, advance to next segment in data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ /* How many characters left in this segment to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend - d; ~~~~~~~~~~~~~~~~ /* Want how many consecutive characters we can match in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one shot, so, if necessary, adjust the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt > dend2 - d2) ~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend2 - d2; ~~~~~~~~~~~~~~~~~~ /* Compare that many; failure if mismatch, else move ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ past them. */ ~~~~~~~~~~~~~~ if (TRANSLATE_P (translate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bcmp_translate (d, d2, mcnt, translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , fmt, lispobj ~~~~~~~~~~~~~~ #endif ~~~~~~ ) ~ : memcmp (d, d2, mcnt)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ d += mcnt, d2 += mcnt; ~~~~~~~~~~~~~~~~~~~~~~ /* Do this because we've match some characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ } ~ break; ~~~~~~ /* begline matches the empty string at the beginning of the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (unless `not_bol' is set in `bufp'), and, if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `newline_anchor' is set, after newlines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_bol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ re_char *d2 = d; ~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (d2); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* In all other cases, we fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* endline is the dual of begline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_eol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We have to ``prefetch'' the next character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if ((d == end1 ? ~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (string2, fmt, lispobj) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ goto fail; ~~~~~~~~~~ /* Match at the very beginning of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* Match at the very end of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* on_failure_keep_string_jump is used to optimize `.*\n'. It ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pushes NULL as the value for the string on the stack. Then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point' will keep the current value for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string, instead of restoring it. To see why, consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching `foo\nbar' against `.*\n'. The .* matches the foo; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the . fails against the \n. But the next thing we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to do is match the \n against the \n; if we restored the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string value, we would be back at the foo. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this is used only in specific cases, we don't need to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check all the things that `on_failure_jump' does, to make ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sure the right things get saved on the stack. Hence we don't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ share its code. The only reason to push anything on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack at all is that otherwise we would have to change ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `anychar's code to do something besides goto fail in this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case; that seems worse than this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Uses of on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each alternative starts with an on_failure_jump that points ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the beginning of the next alternative. Each alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ except the last ends with a jump that in effect jumps past ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the rest of the alternatives. (They really jump to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending jump of the following alternative, because tensioning ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ these jumps is a hassle.) ~~~~~~~~~~~~~~~~~~~~~~~~~ Repeats start with an on_failure_jump that points past both ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the repetition text and either the following jump or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump back to this on_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ on_failure: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx)", mcnt, (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this on_failure_jump comes right before a group (i.e., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the original * applied to a group), save the information ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for that group and all inner ones, so that if we fail back ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to this point, the group's information will be correct. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, in \(a*\)*\1, we need the preceding group, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and in \(\(a*\)b*\)\2, we need the inner group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We can't use `p' to check ahead because we push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a failure point to `p + mcnt' after we do this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* We need to skip no_op's before we look for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start_memory in case this on_failure_jump is happening as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against aba. */ ~~~~~~~~~~~~~~~~ while (p1 < pend && (re_opcode_t) *p1 == no_op) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1++; ~~~~~ if (p1 < pend && (re_opcode_t) *p1 == start_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have a new highest active register now. This will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get reset at the start_memory we are about to get to, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but we will have saved all the registers relevant to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this repetition op, as described above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = *(p1 + 1) + *(p1 + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = *(p1 + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 (":\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* A smart repeat ends with `maybe_pop_jump'. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We change it to either `pop_failure_jump' or `jump'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER const unsigned char *p2 = p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Compare the beginning of the repeat with what in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern follows its end. If we can establish that there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is nothing that they would both match, i.e., that we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ would have to backtrack because of (as in, e.g., `a*a') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then we can change to pop_failure_jump, because we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ never have to backtrack. ~~~~~~~~~~~~~~~~~~~~~~~~ This is not true in the case of alternatives: in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `(a|ab)*' we do need to backtrack to the `ab' alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (e.g., if the string was `ab'). But instead of trying to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ detect that here, the alternative has put on a dummy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point which is what we will end up popping. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Skip over open/close-group commands. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If what follows this loop is a ...+ construct, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ look at what begins its body, since we will have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match at least one of that. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p2 + 2 < pend ~~~~~~~~~~~~~~~~~ && ((re_opcode_t) *p2 == stop_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) *p2 == start_memory)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 3; ~~~~~~~~ else if (p2 + 6 < pend ~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p2 == dummy_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 6; ~~~~~~~~ else ~~~~ break; ~~~~~~ } ~ p1 = p + mcnt; ~~~~~~~~~~~~~~ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the `maybe_finalize_jump' of this case. Examine what ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ follows. */ ~~~~~~~~~~~~ /* If we're at the end of the pattern, we can change. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p2 == pend) ~~~~~~~~~~~~~~~ { ~ /* Consider what happens when matching ":\(.*\)" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against ":/". I don't really understand this code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ yet. */ ~~~~~~~~ ((unsigned char *)p)[-3] = (re_char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ~~~~~~~~~~~~~~~~~~ (" End of pattern: change to `pop_failure_jump'.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if ((re_opcode_t) *p2 == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p1[3] == exactn && p1[5] != c) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int not_p = (re_opcode_t) p1[3] == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c < (unsigned char) (p1[4] * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ /* `not_p' is equal to 1 if c would match, which means ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that we can't change to pop_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) ~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ else if ((re_opcode_t) *p2 == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ #ifdef DEBUG ~~~~~~~~~~~~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if ((re_opcode_t) p1[3] == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (p2[2 + p1[5] / BYTEWIDTH] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ & (1 << (p1[5] % BYTEWIDTH))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset_not inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lists every character listed in the charset after. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; idx < (int) p2[1]; idx++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! (p2[2 + idx] == 0 ~~~~~~~~~~~~~~~~~~~~~~~ || (idx < (int) p1[4] ~~~~~~~~~~~~~~~~~~~~~ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1]) ~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *) p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else if ((re_opcode_t) p1[3] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ has no overlap with the one after the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; ~~~~~~~~~~~~~ idx < (int) p2[1] && idx < (int) p1[4]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ idx++) ~~~~~~ if ((p2[2 + idx] & p1[5 + idx]) != 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1] || idx == p1[4]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ } ~ p -= 2; /* Point at relative address again. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p[-1] != pop_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ p[-1] = (unsigned char) jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" Match => jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Note fall through. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The end of a simple repeat has a pop_failure_jump back to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its matching on_failure_jump, where the latter will push a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point. The pop_failure_jump takes off failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ points put on by this pop_failure_jump's matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump; we got through the pattern to here from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching on_failure_jump, so didn't fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We need to pass separate storage for the lowest and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest registers, even though we don't care about the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ actual values. Otherwise, we will restore only one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register from the stack, since lowest will == highest in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point'. */ ~~~~~~~~~~~~~~~~~~~~~~~~ int dummy_low_reg, dummy_high_reg; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pdummy; ~~~~~~~~~~~~~~~~~~~~~~ re_char *sdummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~ USED (sdummy); /* Silence warning. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POP_FAILURE_POINT (sdummy, pdummy, ~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6767:13: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (sdummy, pdummy, ^~~~~~~~~~~~~~~~~ regex.c:1905:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ^ ~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ } ~ } ~ /* We used to move past the register number and inner group count ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here, when registers were just one byte; that's no longer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ necessary with EXTRACT_NUMBER_AND_INCR(), above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* \ has been turned into a `duplicate' command which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ followed by the numeric value of as the register number. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Already passed through external-to-internal-register mapping, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it refers to the actual group number, not the non-shy-only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numbering used in the external world.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ { ~ REGISTER re_char *d2, *dend2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Get which register to match against. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regno; ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference a group which we've never matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* Where in input to try to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = regstart[regno]; ~~~~~~~~~~~~~~~~~~~~~ /* Where to stop matching; if both the place to start and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the place to stop matching are in the same string, then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set to the place to stop, otherwise, for now have to use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the first string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend2 = ((FIRST_STRING_P (regstart[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == FIRST_STRING_P (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? regend[regno] : end_match_1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ /* If necessary, advance to next segment in register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents. */ ~~~~~~~~~~~~~ while (d2 == dend2) ~~~~~~~~~~~~~~~~~~~ { ~ if (dend2 == end_match_2) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend2 == regend[regno]) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = string2; ~~~~~~~~~~~~~ dend2 = regend[regno]; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* At end of register contents => success */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d2 == dend2) break; ~~~~~~~~~~~~~~~~~~~~~~~ /* If necessary, advance to next segment in data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ /* How many characters left in this segment to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend - d; ~~~~~~~~~~~~~~~~ /* Want how many consecutive characters we can match in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one shot, so, if necessary, adjust the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt > dend2 - d2) ~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend2 - d2; ~~~~~~~~~~~~~~~~~~ /* Compare that many; failure if mismatch, else move ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ past them. */ ~~~~~~~~~~~~~~ if (TRANSLATE_P (translate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bcmp_translate (d, d2, mcnt, translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , fmt, lispobj ~~~~~~~~~~~~~~ #endif ~~~~~~ ) ~ : memcmp (d, d2, mcnt)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ d += mcnt, d2 += mcnt; ~~~~~~~~~~~~~~~~~~~~~~ /* Do this because we've match some characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ } ~ break; ~~~~~~ /* begline matches the empty string at the beginning of the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (unless `not_bol' is set in `bufp'), and, if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `newline_anchor' is set, after newlines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_bol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ re_char *d2 = d; ~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (d2); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* In all other cases, we fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* endline is the dual of begline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_eol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We have to ``prefetch'' the next character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if ((d == end1 ? ~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (string2, fmt, lispobj) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ goto fail; ~~~~~~~~~~ /* Match at the very beginning of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* Match at the very end of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* on_failure_keep_string_jump is used to optimize `.*\n'. It ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pushes NULL as the value for the string on the stack. Then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point' will keep the current value for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string, instead of restoring it. To see why, consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching `foo\nbar' against `.*\n'. The .* matches the foo; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the . fails against the \n. But the next thing we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to do is match the \n against the \n; if we restored the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string value, we would be back at the foo. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this is used only in specific cases, we don't need to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check all the things that `on_failure_jump' does, to make ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sure the right things get saved on the stack. Hence we don't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ share its code. The only reason to push anything on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack at all is that otherwise we would have to change ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `anychar's code to do something besides goto fail in this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case; that seems worse than this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Uses of on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each alternative starts with an on_failure_jump that points ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the beginning of the next alternative. Each alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ except the last ends with a jump that in effect jumps past ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the rest of the alternatives. (They really jump to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending jump of the following alternative, because tensioning ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ these jumps is a hassle.) ~~~~~~~~~~~~~~~~~~~~~~~~~ Repeats start with an on_failure_jump that points past both ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the repetition text and either the following jump or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump back to this on_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ on_failure: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx)", mcnt, (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this on_failure_jump comes right before a group (i.e., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the original * applied to a group), save the information ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for that group and all inner ones, so that if we fail back ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to this point, the group's information will be correct. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, in \(a*\)*\1, we need the preceding group, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and in \(\(a*\)b*\)\2, we need the inner group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We can't use `p' to check ahead because we push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a failure point to `p + mcnt' after we do this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* We need to skip no_op's before we look for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start_memory in case this on_failure_jump is happening as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against aba. */ ~~~~~~~~~~~~~~~~ while (p1 < pend && (re_opcode_t) *p1 == no_op) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1++; ~~~~~ if (p1 < pend && (re_opcode_t) *p1 == start_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have a new highest active register now. This will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get reset at the start_memory we are about to get to, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but we will have saved all the registers relevant to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this repetition op, as described above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = *(p1 + 1) + *(p1 + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = *(p1 + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 (":\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* A smart repeat ends with `maybe_pop_jump'. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We change it to either `pop_failure_jump' or `jump'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER const unsigned char *p2 = p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Compare the beginning of the repeat with what in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern follows its end. If we can establish that there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is nothing that they would both match, i.e., that we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ would have to backtrack because of (as in, e.g., `a*a') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then we can change to pop_failure_jump, because we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ never have to backtrack. ~~~~~~~~~~~~~~~~~~~~~~~~ This is not true in the case of alternatives: in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `(a|ab)*' we do need to backtrack to the `ab' alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (e.g., if the string was `ab'). But instead of trying to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ detect that here, the alternative has put on a dummy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point which is what we will end up popping. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Skip over open/close-group commands. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If what follows this loop is a ...+ construct, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ look at what begins its body, since we will have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match at least one of that. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p2 + 2 < pend ~~~~~~~~~~~~~~~~~ && ((re_opcode_t) *p2 == stop_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) *p2 == start_memory)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 3; ~~~~~~~~ else if (p2 + 6 < pend ~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p2 == dummy_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 6; ~~~~~~~~ else ~~~~ break; ~~~~~~ } ~ p1 = p + mcnt; ~~~~~~~~~~~~~~ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the `maybe_finalize_jump' of this case. Examine what ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ follows. */ ~~~~~~~~~~~~ /* If we're at the end of the pattern, we can change. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p2 == pend) ~~~~~~~~~~~~~~~ { ~ /* Consider what happens when matching ":\(.*\)" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against ":/". I don't really understand this code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ yet. */ ~~~~~~~~ ((unsigned char *)p)[-3] = (re_char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ~~~~~~~~~~~~~~~~~~ (" End of pattern: change to `pop_failure_jump'.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if ((re_opcode_t) *p2 == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p1[3] == exactn && p1[5] != c) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int not_p = (re_opcode_t) p1[3] == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c < (unsigned char) (p1[4] * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ /* `not_p' is equal to 1 if c would match, which means ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that we can't change to pop_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) ~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ else if ((re_opcode_t) *p2 == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ #ifdef DEBUG ~~~~~~~~~~~~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if ((re_opcode_t) p1[3] == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (p2[2 + p1[5] / BYTEWIDTH] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ & (1 << (p1[5] % BYTEWIDTH))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset_not inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lists every character listed in the charset after. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; idx < (int) p2[1]; idx++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! (p2[2 + idx] == 0 ~~~~~~~~~~~~~~~~~~~~~~~ || (idx < (int) p1[4] ~~~~~~~~~~~~~~~~~~~~~ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1]) ~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *) p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else if ((re_opcode_t) p1[3] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ has no overlap with the one after the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; ~~~~~~~~~~~~~ idx < (int) p2[1] && idx < (int) p1[4]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ idx++) ~~~~~~ if ((p2[2 + idx] & p1[5 + idx]) != 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1] || idx == p1[4]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ } ~ p -= 2; /* Point at relative address again. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p[-1] != pop_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ p[-1] = (unsigned char) jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" Match => jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Note fall through. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The end of a simple repeat has a pop_failure_jump back to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its matching on_failure_jump, where the latter will push a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point. The pop_failure_jump takes off failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ points put on by this pop_failure_jump's matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump; we got through the pattern to here from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching on_failure_jump, so didn't fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We need to pass separate storage for the lowest and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest registers, even though we don't care about the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ actual values. Otherwise, we will restore only one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register from the stack, since lowest will == highest in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point'. */ ~~~~~~~~~~~~~~~~~~~~~~~~ int dummy_low_reg, dummy_high_reg; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pdummy; ~~~~~~~~~~~~~~~~~~~~~~ re_char *sdummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~ USED (sdummy); /* Silence warning. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POP_FAILURE_POINT (sdummy, pdummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6767:13: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (sdummy, pdummy, ^~~~~~~~~~~~~~~~~ regex.c:1920:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'Bytecount {aka long int}' [-Wformat=] DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ^ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6767:13: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (sdummy, pdummy, ^~~~~~~~~~~~~~~~~ regex.c:1922:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ^ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6767:13: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (sdummy, pdummy, ^~~~~~~~~~~~~~~~~ regex.c:1924:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ^ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6767:13: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (sdummy, pdummy, ^~~~~~~~~~~~~~~~~ regex.c:6781:31: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT2 ("(to 0x%zx).\n", (Bytecount) p); ^ ~~~~~~~~~~~~~ regex.c:789:50: note: in definition of macro 'DEBUG_MATCH_PRINT2' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) ^~ regex.c:1731:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Before push, next avail: %zd\n", \ ^ (Bytecount) (fail_stack).avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1733:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1737:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1756:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 ("\n Doubled stack; size now: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1758:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" slots available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1777:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ^ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1779:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ^ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1781:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" info: 0x%zx\n ", \ ^ * (long *) (®_info[this_reg])); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1814:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing pattern 0x%zx: \n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1817:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1731:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Before push, next avail: %zd\n", \ ^ (Bytecount) (fail_stack).avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1733:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1737:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1756:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 ("\n Doubled stack; size now: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1758:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" slots available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1777:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ^ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1779:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ^ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1781:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" info: 0x%zx\n ", \ ^ * (long *) (®_info[this_reg])); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1814:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing pattern 0x%zx: \n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1817:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:6828:36: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT3 (" Setting 0x%zx to %d.\n", (Bytecount) p, ^ ~~~~~~~~~~~~~ regex.c:791:50: note: in definition of macro 'DEBUG_MATCH_PRINT3' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2, x3) ^~ regex.c:6834:35: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT2 (" Setting two bytes from 0x%zx to no_op.\n", ^ (Bytecount) (p+2)); ~~~~~~~~~~~~~~~~~ regex.c:789:50: note: in definition of macro 'DEBUG_MATCH_PRINT2' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) ^~ regex.c:6868:33: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT3 (" Setting 0x%zx to %d.\n", (Bytecount) p2, ^ ~~~~~~~~~~~~~ regex.c:791:50: note: in definition of macro 'DEBUG_MATCH_PRINT3' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2, x3) ^~ --- sequence.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include sequence.c --- regex.o --- regex.c:1877:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Before pop, next avail: %zd\n", \ ^ (Bytecount) fail_stack.avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:7173:11: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (d, p, ^~~~~~~~~~~~~~~~~ regex.c:1879:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ^ (Bytecount) fail_stack.size); \ ~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:7173:11: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (d, p, ^~~~~~~~~~~~~~~~~ regex.c:1901:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Popping string 0x%zx: `", (Bytecount) str); \ ^ ~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ } ~ } ~ /* We used to move past the register number and inner group count ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here, when registers were just one byte; that's no longer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ necessary with EXTRACT_NUMBER_AND_INCR(), above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* \ has been turned into a `duplicate' command which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ followed by the numeric value of as the register number. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Already passed through external-to-internal-register mapping, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it refers to the actual group number, not the non-shy-only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numbering used in the external world.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ { ~ REGISTER re_char *d2, *dend2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Get which register to match against. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regno; ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference a group which we've never matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* Where in input to try to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = regstart[regno]; ~~~~~~~~~~~~~~~~~~~~~ /* Where to stop matching; if both the place to start and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the place to stop matching are in the same string, then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set to the place to stop, otherwise, for now have to use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the first string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend2 = ((FIRST_STRING_P (regstart[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == FIRST_STRING_P (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? regend[regno] : end_match_1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ /* If necessary, advance to next segment in register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents. */ ~~~~~~~~~~~~~ while (d2 == dend2) ~~~~~~~~~~~~~~~~~~~ { ~ if (dend2 == end_match_2) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend2 == regend[regno]) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = string2; ~~~~~~~~~~~~~ dend2 = regend[regno]; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* At end of register contents => success */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d2 == dend2) break; ~~~~~~~~~~~~~~~~~~~~~~~ /* If necessary, advance to next segment in data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ /* How many characters left in this segment to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend - d; ~~~~~~~~~~~~~~~~ /* Want how many consecutive characters we can match in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one shot, so, if necessary, adjust the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt > dend2 - d2) ~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend2 - d2; ~~~~~~~~~~~~~~~~~~ /* Compare that many; failure if mismatch, else move ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ past them. */ ~~~~~~~~~~~~~~ if (TRANSLATE_P (translate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bcmp_translate (d, d2, mcnt, translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , fmt, lispobj ~~~~~~~~~~~~~~ #endif ~~~~~~ ) ~ : memcmp (d, d2, mcnt)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ d += mcnt, d2 += mcnt; ~~~~~~~~~~~~~~~~~~~~~~ /* Do this because we've match some characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ } ~ break; ~~~~~~ /* begline matches the empty string at the beginning of the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (unless `not_bol' is set in `bufp'), and, if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `newline_anchor' is set, after newlines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_bol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ re_char *d2 = d; ~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (d2); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* In all other cases, we fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* endline is the dual of begline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_eol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We have to ``prefetch'' the next character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if ((d == end1 ? ~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (string2, fmt, lispobj) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ goto fail; ~~~~~~~~~~ /* Match at the very beginning of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* Match at the very end of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* on_failure_keep_string_jump is used to optimize `.*\n'. It ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pushes NULL as the value for the string on the stack. Then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point' will keep the current value for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string, instead of restoring it. To see why, consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching `foo\nbar' against `.*\n'. The .* matches the foo; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the . fails against the \n. But the next thing we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to do is match the \n against the \n; if we restored the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string value, we would be back at the foo. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this is used only in specific cases, we don't need to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check all the things that `on_failure_jump' does, to make ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sure the right things get saved on the stack. Hence we don't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ share its code. The only reason to push anything on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack at all is that otherwise we would have to change ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `anychar's code to do something besides goto fail in this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case; that seems worse than this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Uses of on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each alternative starts with an on_failure_jump that points ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the beginning of the next alternative. Each alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ except the last ends with a jump that in effect jumps past ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the rest of the alternatives. (They really jump to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending jump of the following alternative, because tensioning ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ these jumps is a hassle.) ~~~~~~~~~~~~~~~~~~~~~~~~~ Repeats start with an on_failure_jump that points past both ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the repetition text and either the following jump or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump back to this on_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ on_failure: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx)", mcnt, (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this on_failure_jump comes right before a group (i.e., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the original * applied to a group), save the information ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for that group and all inner ones, so that if we fail back ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to this point, the group's information will be correct. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, in \(a*\)*\1, we need the preceding group, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and in \(\(a*\)b*\)\2, we need the inner group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We can't use `p' to check ahead because we push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a failure point to `p + mcnt' after we do this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* We need to skip no_op's before we look for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start_memory in case this on_failure_jump is happening as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against aba. */ ~~~~~~~~~~~~~~~~ while (p1 < pend && (re_opcode_t) *p1 == no_op) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1++; ~~~~~ if (p1 < pend && (re_opcode_t) *p1 == start_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have a new highest active register now. This will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get reset at the start_memory we are about to get to, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but we will have saved all the registers relevant to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this repetition op, as described above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = *(p1 + 1) + *(p1 + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = *(p1 + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 (":\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* A smart repeat ends with `maybe_pop_jump'. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We change it to either `pop_failure_jump' or `jump'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER const unsigned char *p2 = p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Compare the beginning of the repeat with what in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern follows its end. If we can establish that there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is nothing that they would both match, i.e., that we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ would have to backtrack because of (as in, e.g., `a*a') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then we can change to pop_failure_jump, because we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ never have to backtrack. ~~~~~~~~~~~~~~~~~~~~~~~~ This is not true in the case of alternatives: in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `(a|ab)*' we do need to backtrack to the `ab' alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (e.g., if the string was `ab'). But instead of trying to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ detect that here, the alternative has put on a dummy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point which is what we will end up popping. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Skip over open/close-group commands. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If what follows this loop is a ...+ construct, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ look at what begins its body, since we will have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match at least one of that. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p2 + 2 < pend ~~~~~~~~~~~~~~~~~ && ((re_opcode_t) *p2 == stop_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) *p2 == start_memory)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 3; ~~~~~~~~ else if (p2 + 6 < pend ~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p2 == dummy_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 6; ~~~~~~~~ else ~~~~ break; ~~~~~~ } ~ p1 = p + mcnt; ~~~~~~~~~~~~~~ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the `maybe_finalize_jump' of this case. Examine what ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ follows. */ ~~~~~~~~~~~~ /* If we're at the end of the pattern, we can change. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p2 == pend) ~~~~~~~~~~~~~~~ { ~ /* Consider what happens when matching ":\(.*\)" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against ":/". I don't really understand this code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ yet. */ ~~~~~~~~ ((unsigned char *)p)[-3] = (re_char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ~~~~~~~~~~~~~~~~~~ (" End of pattern: change to `pop_failure_jump'.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if ((re_opcode_t) *p2 == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p1[3] == exactn && p1[5] != c) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int not_p = (re_opcode_t) p1[3] == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c < (unsigned char) (p1[4] * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ /* `not_p' is equal to 1 if c would match, which means ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that we can't change to pop_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) ~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ else if ((re_opcode_t) *p2 == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ #ifdef DEBUG ~~~~~~~~~~~~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if ((re_opcode_t) p1[3] == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (p2[2 + p1[5] / BYTEWIDTH] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ & (1 << (p1[5] % BYTEWIDTH))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset_not inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lists every character listed in the charset after. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; idx < (int) p2[1]; idx++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! (p2[2 + idx] == 0 ~~~~~~~~~~~~~~~~~~~~~~~ || (idx < (int) p1[4] ~~~~~~~~~~~~~~~~~~~~~ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1]) ~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *) p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else if ((re_opcode_t) p1[3] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ has no overlap with the one after the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; ~~~~~~~~~~~~~ idx < (int) p2[1] && idx < (int) p1[4]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ idx++) ~~~~~~ if ((p2[2 + idx] & p1[5 + idx]) != 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1] || idx == p1[4]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ } ~ p -= 2; /* Point at relative address again. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p[-1] != pop_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ p[-1] = (unsigned char) jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" Match => jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Note fall through. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The end of a simple repeat has a pop_failure_jump back to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its matching on_failure_jump, where the latter will push a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point. The pop_failure_jump takes off failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ points put on by this pop_failure_jump's matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump; we got through the pattern to here from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching on_failure_jump, so didn't fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We need to pass separate storage for the lowest and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest registers, even though we don't care about the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ actual values. Otherwise, we will restore only one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register from the stack, since lowest will == highest in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point'. */ ~~~~~~~~~~~~~~~~~~~~~~~~ int dummy_low_reg, dummy_high_reg; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pdummy; ~~~~~~~~~~~~~~~~~~~~~~ re_char *sdummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~ USED (sdummy); /* Silence warning. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POP_FAILURE_POINT (sdummy, pdummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dummy_low_reg, dummy_high_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy, reg_dummy, reg_info_dummy); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ USED (pdummy); ~~~~~~~~~~~~~~ } ~ /* Note fall through. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Unconditionally jump (without popping any failure points). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ unconditional_jump: ~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING jump %d ", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += mcnt; /* Do the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("(to 0x%zx).\n", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* We need this opcode so we can detect where alternatives end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in `group_match_null_string_p' et al. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING jump_past_alt.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Normally, the on_failure_jump pushes a failure point, which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then gets popped at pop_failure_jump. We will end up at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump, also, and with a pattern of, say, `a+', we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are skipping over the on_failure_jump, so we have to push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ something meaningless for pop_failure_jump to pop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING dummy_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It doesn't matter what we push for the string here. What ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the code at `fail' tests is the value for the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ /* At the end of an alternative, we need to push a dummy failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point in case we are followed by a `pop_failure_jump', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we don't want the failure point for the alternative to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ popped. For example, matching `(a|ab)*' against `aab' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ requires that we match the `ab' alternative. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING push_dummy_failure.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* See comments just above at `dummy_failure_jump' about the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ two zeroes. */ ~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Have to succeed matching what follows at least n times. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ After that, handle like `on_failure_jump'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE (mcnt, p + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Originally, this is how many times we HAVE to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt) ~~~~~~~~~ { ~ mcnt--; ~~~~~~~ p += 2; ~~~~~~~ DEBUG_MATCH_PRINT3 (" Setting 0x%zx to %d.\n", (Bytecount) p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt); ~~~~~~ STORE_MATCH_NUMBER_AND_INCR (p, mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ DEBUG_MATCH_PRINT2 (" Setting two bytes from 0x%zx to no_op.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p+2)); ~~~~~~~~~~~~~~~~~~~ STORE_MATCH_NUMBER (p + 2, no_op); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto on_failure; ~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case jump_n: ~~~~~~~~~~~~ EXTRACT_NONNEGATIVE (mcnt, p + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Originally, this is how many times we CAN jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt) ~~~~~~~~~ { ~ mcnt--; ~~~~~~~ STORE_MATCH_NUMBER (p + 2, mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If don't have to jump any more, skip over the rest of command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ p += 4; ~~~~~~~ break; ~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p2; /* Location of the counter. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING set_number_at.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Discard 'const', making re_match_2_internal() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-reentrant. */ ~~~~~~~~~~~~~~~~~~ p2 = (unsigned char *) p + mcnt; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" Setting 0x%zx to %d.\n", (Bytecount) p2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt); ~~~~~~ STORE_MATCH_NUMBER (p2, mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ case wordbound: ~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING wordbound.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ should_succeed = 1; ~~~~~~~~~~~~~~~~~~~ matchwordbound: ~~~~~~~~~~~~~~~ { ~ /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~ /* Straightforward and (I hope) correct implementation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* emch1 is the character before d, syn1 is the syntax of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch1, emch2 is the character at d, and syn2 is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax of emch2. */ ~~~~~~~~~~~~~~~~~~~ Ichar emch1, emch2; ~~~~~~~~~~~~~~~~~~~ int syn1 = 0, ~~~~~~~~~~~~~ syn2 = 0; ~~~~~~~~~ re_char *d_before, *d_after; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int result, ~~~~~~~~~~~ at_beg = AT_STRINGS_BEG (d), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at_end = AT_STRINGS_END (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (at_beg && at_end) ~~~~~~~~~~~~~~~~~~~~~ { ~ result = 0; ~~~~~~~~~~~ } ~ else ~~~~ { ~ if (!at_beg) ~~~~~~~~~~~~ { ~ d_before = POS_BEFORE_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d_before, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch1 = itext_ichar_fmt (d_before, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE (scache, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos ~~~~~~~~~~~~~~~~~~ (lispobj, PTR_TO_OFFSET (d_before))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ syn1 = SYNTAX_FROM_CACHE (scache, emch1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!at_end) ~~~~~~~~~~~~ { ~ d_after = POS_AFTER_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch2 = itext_ichar_fmt (d_after, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE_FORWARD (scache, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos ~~~~~~~~~~~~~~~~~~ (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ syn2 = SYNTAX_FROM_CACHE (scache, emch2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ } ~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (at_beg) ~~~~~~~~~~~ result = (syn2 == Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~ else if (at_end) ~~~~~~~~~~~~~~~~ result = (syn1 == Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ result = ((syn1 == Sword) != (syn2 == Sword)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (result == should_succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ } ~ case notwordbound: ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING notwordbound.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ should_succeed = 0; ~~~~~~~~~~~~~~~~~~~ goto matchwordbound; ~~~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING wordbeg.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ { ~ /* XEmacs: this originally read: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ */ ~~ re_char *dtmp = POS_AFTER_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int tempres; ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE ~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ dtmp = POS_BEFORE_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (dtmp, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE_BACKWARD ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (dtmp))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ } ~ case wordend: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING wordend.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ { ~ /* XEmacs: this originally read: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ The or condition is incorrect (reversed). ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ re_char *dtmp; ~~~~~~~~~~~~~~ Ichar emch; ~~~~~~~~~~~ int tempres; ~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE ~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ dtmp = POS_BEFORE_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (dtmp, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ dtmp = POS_AFTER_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ { ~ re_char *next = d; ~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (next, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE_FORWARD ~~~~~~~~~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (next))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING before_dot.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!BUFFERP (lispobj) ~~~~~~~~~~~~~~~~~~~~~~ || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >= BUF_PT (XBUFFER (lispobj)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ break; ~~~~~~ case at_dot: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING at_dot.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!BUFFERP (lispobj) ~~~~~~~~~~~~~~~~~~~~~~ || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != BUF_PT (XBUFFER (lispobj)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ break; ~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING after_dot.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!BUFFERP (lispobj) ~~~~~~~~~~~~~~~~~~~~~~ || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <= BUF_PT (XBUFFER (lispobj)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ break; ~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING Emacs wordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = (int) Sword; ~~~~~~~~~~~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ should_succeed = 1; ~~~~~~~~~~~~~~~~~~~ matchornotsyntax: ~~~~~~~~~~~~~~~~~ { ~ int matches; ~~~~~~~~~~~~ Ichar emch; ~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE ~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ matches = (SYNTAX_FROM_CACHE (scache, emch) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (matches != should_succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING Emacs notwordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = (int) Sword; ~~~~~~~~~~~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ should_succeed = 0; ~~~~~~~~~~~~~~~~~~~ goto matchornotsyntax; ~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod Mule category code patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ should_succeed = 1; ~~~~~~~~~~~~~~~~~~~ matchornotcategory: ~~~~~~~~~~~~~~~~~~~ { ~ Ichar emch; ~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (check_char_in_category (emch, BUFFER_CATEGORY_TABLE (lispbuf), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt, should_succeed)) ~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ should_succeed = 0; ~~~~~~~~~~~~~~~~~~~ goto matchornotcategory; ~~~~~~~~~~~~~~~~~~~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #else /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (!WORDCHAR_P ((int) (*d))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ d++; ~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (!WORDCHAR_P ((int) (*d))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ d++; ~~~~ break; ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ } ~ continue; /* Successfully executed one pattern command; keep going. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We goto here if a matching operation fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail: ~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* A restart point is known. Restore to that state. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nFAIL:\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POP_FAILURE_POINT (d, p, ~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:7173:11: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (d, p, ^~~~~~~~~~~~~~~~~ --- signal.o --- --- sequence.o --- In file included from sequence.c:20:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- signal.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include signal.c --- regex.o --- regex.c:1905:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ^ ~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ } ~ } ~ /* We used to move past the register number and inner group count ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here, when registers were just one byte; that's no longer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ necessary with EXTRACT_NUMBER_AND_INCR(), above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* \ has been turned into a `duplicate' command which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ followed by the numeric value of as the register number. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Already passed through external-to-internal-register mapping, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it refers to the actual group number, not the non-shy-only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numbering used in the external world.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ { ~ REGISTER re_char *d2, *dend2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Get which register to match against. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regno; ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference a group which we've never matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* Where in input to try to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = regstart[regno]; ~~~~~~~~~~~~~~~~~~~~~ /* Where to stop matching; if both the place to start and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the place to stop matching are in the same string, then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set to the place to stop, otherwise, for now have to use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the first string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend2 = ((FIRST_STRING_P (regstart[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == FIRST_STRING_P (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? regend[regno] : end_match_1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ /* If necessary, advance to next segment in register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents. */ ~~~~~~~~~~~~~ while (d2 == dend2) ~~~~~~~~~~~~~~~~~~~ { ~ if (dend2 == end_match_2) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend2 == regend[regno]) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = string2; ~~~~~~~~~~~~~ dend2 = regend[regno]; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* At end of register contents => success */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d2 == dend2) break; ~~~~~~~~~~~~~~~~~~~~~~~ /* If necessary, advance to next segment in data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ /* How many characters left in this segment to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend - d; ~~~~~~~~~~~~~~~~ /* Want how many consecutive characters we can match in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one shot, so, if necessary, adjust the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt > dend2 - d2) ~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend2 - d2; ~~~~~~~~~~~~~~~~~~ /* Compare that many; failure if mismatch, else move ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ past them. */ ~~~~~~~~~~~~~~ if (TRANSLATE_P (translate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bcmp_translate (d, d2, mcnt, translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , fmt, lispobj ~~~~~~~~~~~~~~ #endif ~~~~~~ ) ~ : memcmp (d, d2, mcnt)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ d += mcnt, d2 += mcnt; ~~~~~~~~~~~~~~~~~~~~~~ /* Do this because we've match some characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ } ~ break; ~~~~~~ /* begline matches the empty string at the beginning of the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (unless `not_bol' is set in `bufp'), and, if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `newline_anchor' is set, after newlines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_bol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ re_char *d2 = d; ~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (d2); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* In all other cases, we fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* endline is the dual of begline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_eol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We have to ``prefetch'' the next character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if ((d == end1 ? ~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (string2, fmt, lispobj) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ goto fail; ~~~~~~~~~~ /* Match at the very beginning of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* Match at the very end of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* on_failure_keep_string_jump is used to optimize `.*\n'. It ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pushes NULL as the value for the string on the stack. Then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point' will keep the current value for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string, instead of restoring it. To see why, consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching `foo\nbar' against `.*\n'. The .* matches the foo; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the . fails against the \n. But the next thing we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to do is match the \n against the \n; if we restored the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string value, we would be back at the foo. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this is used only in specific cases, we don't need to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check all the things that `on_failure_jump' does, to make ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sure the right things get saved on the stack. Hence we don't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ share its code. The only reason to push anything on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack at all is that otherwise we would have to change ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `anychar's code to do something besides goto fail in this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case; that seems worse than this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Uses of on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each alternative starts with an on_failure_jump that points ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the beginning of the next alternative. Each alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ except the last ends with a jump that in effect jumps past ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the rest of the alternatives. (They really jump to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending jump of the following alternative, because tensioning ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ these jumps is a hassle.) ~~~~~~~~~~~~~~~~~~~~~~~~~ Repeats start with an on_failure_jump that points past both ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the repetition text and either the following jump or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump back to this on_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ on_failure: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx)", mcnt, (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this on_failure_jump comes right before a group (i.e., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the original * applied to a group), save the information ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for that group and all inner ones, so that if we fail back ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to this point, the group's information will be correct. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, in \(a*\)*\1, we need the preceding group, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and in \(\(a*\)b*\)\2, we need the inner group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We can't use `p' to check ahead because we push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a failure point to `p + mcnt' after we do this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* We need to skip no_op's before we look for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start_memory in case this on_failure_jump is happening as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against aba. */ ~~~~~~~~~~~~~~~~ while (p1 < pend && (re_opcode_t) *p1 == no_op) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1++; ~~~~~ if (p1 < pend && (re_opcode_t) *p1 == start_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have a new highest active register now. This will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get reset at the start_memory we are about to get to, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but we will have saved all the registers relevant to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this repetition op, as described above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = *(p1 + 1) + *(p1 + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = *(p1 + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 (":\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* A smart repeat ends with `maybe_pop_jump'. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We change it to either `pop_failure_jump' or `jump'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER const unsigned char *p2 = p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Compare the beginning of the repeat with what in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern follows its end. If we can establish that there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is nothing that they would both match, i.e., that we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ would have to backtrack because of (as in, e.g., `a*a') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then we can change to pop_failure_jump, because we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ never have to backtrack. ~~~~~~~~~~~~~~~~~~~~~~~~ This is not true in the case of alternatives: in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `(a|ab)*' we do need to backtrack to the `ab' alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (e.g., if the string was `ab'). But instead of trying to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ detect that here, the alternative has put on a dummy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point which is what we will end up popping. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Skip over open/close-group commands. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If what follows this loop is a ...+ construct, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ look at what begins its body, since we will have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match at least one of that. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p2 + 2 < pend ~~~~~~~~~~~~~~~~~ && ((re_opcode_t) *p2 == stop_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) *p2 == start_memory)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 3; ~~~~~~~~ else if (p2 + 6 < pend ~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p2 == dummy_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 6; ~~~~~~~~ else ~~~~ break; ~~~~~~ } ~ p1 = p + mcnt; ~~~~~~~~~~~~~~ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the `maybe_finalize_jump' of this case. Examine what ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ follows. */ ~~~~~~~~~~~~ /* If we're at the end of the pattern, we can change. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p2 == pend) ~~~~~~~~~~~~~~~ { ~ /* Consider what happens when matching ":\(.*\)" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against ":/". I don't really understand this code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ yet. */ ~~~~~~~~ ((unsigned char *)p)[-3] = (re_char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ~~~~~~~~~~~~~~~~~~ (" End of pattern: change to `pop_failure_jump'.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if ((re_opcode_t) *p2 == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p1[3] == exactn && p1[5] != c) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int not_p = (re_opcode_t) p1[3] == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c < (unsigned char) (p1[4] * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ /* `not_p' is equal to 1 if c would match, which means ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that we can't change to pop_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) ~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ else if ((re_opcode_t) *p2 == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ #ifdef DEBUG ~~~~~~~~~~~~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if ((re_opcode_t) p1[3] == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (p2[2 + p1[5] / BYTEWIDTH] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ & (1 << (p1[5] % BYTEWIDTH))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset_not inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lists every character listed in the charset after. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; idx < (int) p2[1]; idx++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! (p2[2 + idx] == 0 ~~~~~~~~~~~~~~~~~~~~~~~ || (idx < (int) p1[4] ~~~~~~~~~~~~~~~~~~~~~ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1]) ~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *) p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else if ((re_opcode_t) p1[3] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ has no overlap with the one after the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; ~~~~~~~~~~~~~ idx < (int) p2[1] && idx < (int) p1[4]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ idx++) ~~~~~~ if ((p2[2 + idx] & p1[5 + idx]) != 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1] || idx == p1[4]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ } ~ p -= 2; /* Point at relative address again. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p[-1] != pop_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ p[-1] = (unsigned char) jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" Match => jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Note fall through. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The end of a simple repeat has a pop_failure_jump back to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its matching on_failure_jump, where the latter will push a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point. The pop_failure_jump takes off failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ points put on by this pop_failure_jump's matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump; we got through the pattern to here from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching on_failure_jump, so didn't fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We need to pass separate storage for the lowest and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest registers, even though we don't care about the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ actual values. Otherwise, we will restore only one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register from the stack, since lowest will == highest in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point'. */ ~~~~~~~~~~~~~~~~~~~~~~~~ int dummy_low_reg, dummy_high_reg; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pdummy; ~~~~~~~~~~~~~~~~~~~~~~ re_char *sdummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~ USED (sdummy); /* Silence warning. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POP_FAILURE_POINT (sdummy, pdummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dummy_low_reg, dummy_high_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy, reg_dummy, reg_info_dummy); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ USED (pdummy); ~~~~~~~~~~~~~~ } ~ /* Note fall through. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Unconditionally jump (without popping any failure points). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ unconditional_jump: ~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING jump %d ", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += mcnt; /* Do the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("(to 0x%zx).\n", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* We need this opcode so we can detect where alternatives end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in `group_match_null_string_p' et al. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING jump_past_alt.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Normally, the on_failure_jump pushes a failure point, which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then gets popped at pop_failure_jump. We will end up at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump, also, and with a pattern of, say, `a+', we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are skipping over the on_failure_jump, so we have to push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ something meaningless for pop_failure_jump to pop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING dummy_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It doesn't matter what we push for the string here. What ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the code at `fail' tests is the value for the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ /* At the end of an alternative, we need to push a dummy failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point in case we are followed by a `pop_failure_jump', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we don't want the failure point for the alternative to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ popped. For example, matching `(a|ab)*' against `aab' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ requires that we match the `ab' alternative. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING push_dummy_failure.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* See comments just above at `dummy_failure_jump' about the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ two zeroes. */ ~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Have to succeed matching what follows at least n times. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ After that, handle like `on_failure_jump'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE (mcnt, p + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Originally, this is how many times we HAVE to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt) ~~~~~~~~~ { ~ mcnt--; ~~~~~~~ p += 2; ~~~~~~~ DEBUG_MATCH_PRINT3 (" Setting 0x%zx to %d.\n", (Bytecount) p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt); ~~~~~~ STORE_MATCH_NUMBER_AND_INCR (p, mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ DEBUG_MATCH_PRINT2 (" Setting two bytes from 0x%zx to no_op.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p+2)); ~~~~~~~~~~~~~~~~~~~ STORE_MATCH_NUMBER (p + 2, no_op); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto on_failure; ~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case jump_n: ~~~~~~~~~~~~ EXTRACT_NONNEGATIVE (mcnt, p + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Originally, this is how many times we CAN jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt) ~~~~~~~~~ { ~ mcnt--; ~~~~~~~ STORE_MATCH_NUMBER (p + 2, mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If don't have to jump any more, skip over the rest of command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ p += 4; ~~~~~~~ break; ~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p2; /* Location of the counter. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING set_number_at.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Discard 'const', making re_match_2_internal() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-reentrant. */ ~~~~~~~~~~~~~~~~~~ p2 = (unsigned char *) p + mcnt; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" Setting 0x%zx to %d.\n", (Bytecount) p2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt); ~~~~~~ STORE_MATCH_NUMBER (p2, mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ case wordbound: ~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING wordbound.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ should_succeed = 1; ~~~~~~~~~~~~~~~~~~~ matchwordbound: ~~~~~~~~~~~~~~~ { ~ /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~ /* Straightforward and (I hope) correct implementation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* emch1 is the character before d, syn1 is the syntax of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch1, emch2 is the character at d, and syn2 is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax of emch2. */ ~~~~~~~~~~~~~~~~~~~ Ichar emch1, emch2; ~~~~~~~~~~~~~~~~~~~ int syn1 = 0, ~~~~~~~~~~~~~ syn2 = 0; ~~~~~~~~~ re_char *d_before, *d_after; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int result, ~~~~~~~~~~~ at_beg = AT_STRINGS_BEG (d), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at_end = AT_STRINGS_END (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (at_beg && at_end) ~~~~~~~~~~~~~~~~~~~~~ { ~ result = 0; ~~~~~~~~~~~ } ~ else ~~~~ { ~ if (!at_beg) ~~~~~~~~~~~~ { ~ d_before = POS_BEFORE_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d_before, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch1 = itext_ichar_fmt (d_before, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE (scache, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos ~~~~~~~~~~~~~~~~~~ (lispobj, PTR_TO_OFFSET (d_before))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ syn1 = SYNTAX_FROM_CACHE (scache, emch1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!at_end) ~~~~~~~~~~~~ { ~ d_after = POS_AFTER_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch2 = itext_ichar_fmt (d_after, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE_FORWARD (scache, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos ~~~~~~~~~~~~~~~~~~ (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ syn2 = SYNTAX_FROM_CACHE (scache, emch2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ } ~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (at_beg) ~~~~~~~~~~~ result = (syn2 == Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~ else if (at_end) ~~~~~~~~~~~~~~~~ result = (syn1 == Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ result = ((syn1 == Sword) != (syn2 == Sword)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (result == should_succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ } ~ case notwordbound: ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING notwordbound.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ should_succeed = 0; ~~~~~~~~~~~~~~~~~~~ goto matchwordbound; ~~~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING wordbeg.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ { ~ /* XEmacs: this originally read: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ */ ~~ re_char *dtmp = POS_AFTER_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int tempres; ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE ~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ dtmp = POS_BEFORE_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (dtmp, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE_BACKWARD ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (dtmp))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ } ~ case wordend: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING wordend.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ { ~ /* XEmacs: this originally read: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ The or condition is incorrect (reversed). ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ re_char *dtmp; ~~~~~~~~~~~~~~ Ichar emch; ~~~~~~~~~~~ int tempres; ~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE ~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ dtmp = POS_BEFORE_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (dtmp, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ dtmp = POS_AFTER_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ { ~ re_char *next = d; ~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (next, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE_FORWARD ~~~~~~~~~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (next))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING before_dot.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!BUFFERP (lispobj) ~~~~~~~~~~~~~~~~~~~~~~ || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >= BUF_PT (XBUFFER (lispobj)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ break; ~~~~~~ case at_dot: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING at_dot.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!BUFFERP (lispobj) ~~~~~~~~~~~~~~~~~~~~~~ || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != BUF_PT (XBUFFER (lispobj)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ break; ~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING after_dot.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!BUFFERP (lispobj) ~~~~~~~~~~~~~~~~~~~~~~ || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <= BUF_PT (XBUFFER (lispobj)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ break; ~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING Emacs wordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = (int) Sword; ~~~~~~~~~~~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ should_succeed = 1; ~~~~~~~~~~~~~~~~~~~ matchornotsyntax: ~~~~~~~~~~~~~~~~~ { ~ int matches; ~~~~~~~~~~~~ Ichar emch; ~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE ~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ matches = (SYNTAX_FROM_CACHE (scache, emch) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (matches != should_succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING Emacs notwordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = (int) Sword; ~~~~~~~~~~~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ should_succeed = 0; ~~~~~~~~~~~~~~~~~~~ goto matchornotsyntax; ~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod Mule category code patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ should_succeed = 1; ~~~~~~~~~~~~~~~~~~~ matchornotcategory: ~~~~~~~~~~~~~~~~~~~ { ~ Ichar emch; ~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (check_char_in_category (emch, BUFFER_CATEGORY_TABLE (lispbuf), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt, should_succeed)) ~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ should_succeed = 0; ~~~~~~~~~~~~~~~~~~~ goto matchornotcategory; ~~~~~~~~~~~~~~~~~~~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #else /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (!WORDCHAR_P ((int) (*d))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ d++; ~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (!WORDCHAR_P ((int) (*d))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ d++; ~~~~ break; ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ } ~ continue; /* Successfully executed one pattern command; keep going. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We goto here if a matching operation fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail: ~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* A restart point is known. Restore to that state. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nFAIL:\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POP_FAILURE_POINT (d, p, ~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:7173:11: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (d, p, ^~~~~~~~~~~~~~~~~ regex.c:1920:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'Bytecount {aka long int}' [-Wformat=] DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ^ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:7173:11: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (d, p, ^~~~~~~~~~~~~~~~~ regex.c:1922:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ^ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:7173:11: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (d, p, ^~~~~~~~~~~~~~~~~ regex.c:1924:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ^ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:7173:11: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (d, p, ^~~~~~~~~~~~~~~~~ --- sound.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include sound.c --- signal.o --- In file included from signal.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- specifier.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include specifier.c --- sound.o --- In file included from sound.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- specifier.o --- In file included from specifier.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- strftime.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include strftime.c In file included from strftime.c:89:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- symbols.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include symbols.c --- strftime.o --- strftime.c: In function 'add_num_time_t': strftime.c:199:49: warning: format '%zu' expects argument of type 'size_t', but argument 4 has type 'long unsigned int' [-Wformat=] emacs_snprintf_ascbyte (buf, sizeof (buf), "%zu", (EMACS_UINT) num); ~~^ ~~~~~~~~~~~~~~~~ %lu --- symbols.o --- In file included from symbols.c:52:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- syntax.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include syntax.c In file included from syntax.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- sysdep.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include sysdep.c In file included from sysdep.c:39:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- sysdll.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include sysdll.c In file included from sysdll.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- terminfo.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include terminfo.c --- tests.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include tests.c --- text.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include text.c --- tests.o --- In file included from tests.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- text.o --- In file included from text.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- tls.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include tls.c In file included from tls.c:23:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- toolbar.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include toolbar.c --- undo.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include undo.c --- toolbar.o --- In file included from toolbar.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- undo.o --- In file included from undo.c:23:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- unicode.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include unicode.c In file included from unicode.c:35:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ unicode.c: In function 'print_precedence_array': unicode.c:1643:46: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, " length=%zd", ~~^ %ld --- widget.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include widget.c In file included from widget.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- window.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include window.c In file included from window.c:37:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- event-Xt.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include event-Xt.c In file included from event-Xt.c:32:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ event-Xt.c: In function 'x_reset_modifier_mapping': event-Xt.c:1494:2: warning: 'XKeycodeToKeysym' is deprecated [-Wdeprecated-declarations] KeySym sym = (code ? XKeycodeToKeysym (display, code, column) : 0); ^~~~~~ In file included from events.h:571:0, from event-Xt.c:41: /pbulk/work/editors/xemacs-current/work/.buildlink/include/X11/Xlib.h:1687:15: note: declared here extern KeySym XKeycodeToKeysym( ^~~~~~~~~~~~~~~~ --- TransientEmacsShell.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include -DDEFINE_TRANSIENT_EMACS_SHELL /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/EmacsShell-sub.c In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/EmacsShell-sub.c:79:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ mv EmacsShell-sub.o TransientEmacsShell.o --- regex.o --- regex.c: In function 're_search_2': regex.c:1503:8: warning: 'd' may be used uninitialized in this function [-Wmaybe-uninitialized] && (re_char *) (val) <= (re_char *) string1 + size1) \ ^~ regex.c:5016:12: note: 'd' was declared here re_char *d; ^ --- dump-id.c --- ../lib-src/make-dump-id ../lib-src/make-dump-id: Shared object "libgmp.so.10" not found *** [dump-id.c] Error code 1 make[1]: stopped in /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src 1 error make[1]: stopped in /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src *** [src] Error code 2 make: stopped in /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35 1 error make: stopped in /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35 *** Error code 2 Stop. make[1]: stopped in /usr/pkgsrc/editors/xemacs-current *** Error code 1 Stop. make: stopped in /usr/pkgsrc/editors/xemacs-current