=> Bootstrap dependency digest>=20211023: found digest-20220214 ===> Skipping vulnerability checks. WARNING: No /usr/pkg/pkgdb/pkg-vulnerabilities file found. WARNING: To fix run: `/usr/sbin/pkg_admin -K /usr/pkg/pkgdb fetch-pkg-vulnerabilities'. ===> Building for xemacs-21.5.35nb6 Resetting `src/sheap-adjust.h'. Producing `src/Emacs.ad.h' from `etc/Emacs.ad'. cp /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/PROBLEMS etc/PROBLEMS if test -n "/pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo"; then cd /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/man && /usr/bin/make -j 8 CC='gcc' CFLAGS='-Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' LDFLAGS='-Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib' CPPFLAGS='-DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' MAKEINFO='/pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo' info; fi --- ../info/beta.info --- --- ../info/cl.info --- --- ../info/emodules.info --- --- ../info/external-widget.info --- --- ../info/info.info --- --- ../info/internals.info --- --- ../info/lispref.info --- --- ../info/new-users-guide.info --- --- ../info/beta.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/beta.info beta.texi --- ../info/cl.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/cl.info cl.texi --- ../info/emodules.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/emodules.info emodules.texi --- ../info/external-widget.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/external-widget.info external-widget.texi --- ../info/info.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/info.info info.texi --- ../info/internals.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -P internals -o ../info/internals.info internals/internals.texi --- ../info/new-users-guide.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -P new-users-guide -o ../info/new-users-guide.info new-users-guide/new-users-guide.texi --- ../info/lispref.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -P lispref -o ../info/lispref.info lispref/lispref.texi --- ../info/standards.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/standards.info standards.texi --- ../info/termcap.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/termcap.info termcap.texi --- ../info/widget.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/widget.info widget.texi --- ../info/xemacs.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -P xemacs -o ../info/xemacs.info xemacs/xemacs.texi --- ../info/xemacs-faq.info --- /pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo -o ../info/xemacs-faq.info xemacs-faq.texi xemacs-faq.texi:7: warning: unrecognized encoding name `UTF-8'. --- ../info/internals.info --- internals/internals.texi:7396: warning: `.' or `,' must follow @xref, not `)'. --- ../info/lispref.info --- /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/man/lispref//searching.texi:1557: warning: unlikely character ( in @var. /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/man/lispref//searching.texi:1557: warning: unlikely character ) in @var. /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/man/lispref//mule.texi:6: warning: unrecognized encoding name `UTF-8'. cd ./lib-src && /usr/bin/make -j 8 CC='gcc' CFLAGS='-Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' LDFLAGS='-Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib' CPPFLAGS='-DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' MAKEINFO='/pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo' all --- gnuslib.o --- --- ellcc --- --- getopt.o --- --- getopt1.o --- --- regex.o --- --- b2m --- --- fakemail --- --- profile --- --- gnuslib.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuslib.c --- ellcc --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/ellcc.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o ellcc --- getopt.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/getopt.c --- getopt1.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/getopt1.c --- regex.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -DINHIBIT_STRING_HEADER /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/regex.c --- b2m --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/b2m.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o b2m --- fakemail --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/fakemail.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o fakemail --- profile --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/profile.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o profile In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/profile.c:34:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ In file included from /usr/include/stdio.h:597:0, from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/profile.c:35: /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/profile.c: In function 'get_time': /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/profile.c:86:25: warning: format '%lu' expects argument of type 'long unsigned int', but argument 5 has type 'time_t {aka long long int}' [-Wformat=] sprintf (time_string, "%lu.%06lu", ^ (unsigned long) TV2.tv_sec - TV1.tv_sec, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- getopt.o --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/getopt.c:32:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- regex.o --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/regex.c:27:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gnuslib.o --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuserv.h:55:0, from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuslib.c:48: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- ellcc --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/ellcc.c:59:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- make-docfile --- --- b2m --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/b2m.c:23:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- make-docfile --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/make-docfile.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o make-docfile --- digest-doc --- gcc -Demacs -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/digest-doc.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o digest-doc --- make-docfile --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/make-docfile.c:42:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- sorted-doc --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/sorted-doc.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o sorted-doc In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/sorted-doc.c:28:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- cvtmail --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/cvtmail.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o cvtmail --- hexl --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/hexl.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o hexl --- mmencode --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/mmencode.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o mmencode --- hexl --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/hexl.c:28:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- cvtmail --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/cvtmail.c:36:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- make-path --- gcc -Demacs -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/make-path.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o make-path --- mmencode --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/mmencode.c:17:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- make-dump-id --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/make-dump-id.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o make-dump-id --- insert-data-in-exec --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/insert-data-in-exec.c -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o insert-data-in-exec --- make-dump-id --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/make-dump-id.c:19:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gnuclient --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuclient.c gnuslib.o -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -L/usr/X11R7/lib -R/usr/X11R7/lib -lXau -lXmu -lXt -lXext -lX11 -lSM -lICE -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o gnuclient --- gnuserv --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuserv.c gnuslib.o -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -L/usr/X11R7/lib -R/usr/X11R7/lib -lXau -lXmu -lXt -lXext -lX11 -lSM -lICE -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o gnuserv --- gnuclient --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuserv.h:55:0, from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuclient.c:48: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gnuserv --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuserv.h:55:0, from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/gnuserv.c:42: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- etags --- --- ootags --- --- movemail --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/movemail.c /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/pop.c getopt.o getopt1.o regex.o -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o movemail --- etags --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -DEMACS_NAME='"XEmacs"' -DVERSION='"21.5-b35"' /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/etags.c getopt.o getopt1.o regex.o -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o etags --- ootags --- gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -DVERSION='"21.5-b35"' /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/ootags.c getopt.o getopt1.o regex.o -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o ootags In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/ootags.c:51:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- etags --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/etags.c:96:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- movemail --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/movemail.c:61:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- ctags --- gcc -DCTAGS -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I../src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -DEMACS_NAME='"XEmacs"' -DVERSION='"21.5-b35"' /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/etags.c getopt.o getopt1.o regex.o -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil -o ctags In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lib-src/etags.c:96:0: ../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ cd ./lwlib && /usr/bin/make -j 8 CC='gcc' CFLAGS='-Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' LDFLAGS='-Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib' CPPFLAGS='-DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' MAKEINFO='/pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo' all --- lwlib.o --- --- lwlib-utils.o --- --- lwlib-colors.o --- --- lwlib-fonts.o --- --- lwlib-Xaw.o --- --- xlwmenu.o --- --- xlwscrollbar.o --- --- xlwtabs.o --- --- lwlib.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lwlib.c --- lwlib-utils.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lwlib-utils.c --- lwlib-colors.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lwlib-colors.c --- lwlib-fonts.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lwlib-fonts.c --- lwlib-Xaw.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lwlib-Xaw.c --- xlwmenu.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include xlwmenu.c --- xlwscrollbar.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include xlwscrollbar.c --- xlwtabs.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include xlwtabs.c --- lwlib-utils.o --- In file included from ./config.h:25:0, from lwlib-utils.c:20: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- xlwmenu.o --- In file included from ./config.h:25:0, from xlwmenu.c:23: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- xlwscrollbar.o --- In file included from ./config.h:25:0, from xlwscrollbar.c:71: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- lwlib-fonts.o --- In file included from ./config.h:25:0, from lwlib-fonts.c:26: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- lwlib-Xaw.o --- In file included from ./config.h:25:0, from lwlib-Xaw.c:21: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- lwlib-colors.o --- In file included from ./config.h:25:0, from lwlib-colors.c:25: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- lwlib.o --- In file included from ./config.h:25:0, from lwlib.c:21: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- xlwgcs.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include xlwgcs.c --- xlwradio.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include xlwradio.c --- xlwcheckbox.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include xlwcheckbox.c --- xlwgauge.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include xlwgauge.c --- lwlib-Xlw.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lwlib/../src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lwlib-Xlw.c --- xlwgauge.o --- In file included from ./config.h:25:0, from xlwgauge.c:46: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- lwlib-Xlw.o --- In file included from ./config.h:25:0, from lwlib-Xlw.c:20: ./../src/config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- liblw.a --- rm -f liblw.a ar cq liblw.a lwlib.o lwlib-utils.o lwlib-colors.o lwlib-fonts.o lwlib-Xaw.o xlwmenu.o xlwscrollbar.o xlwtabs.o xlwgcs.o xlwradio.o xlwcheckbox.o xlwgauge.o lwlib-Xlw.o cd ./src && /usr/bin/make -j 8 CC='gcc' CFLAGS='-Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' LDFLAGS='-Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib' CPPFLAGS='-DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include' MAKEINFO='/pbulk/work/editors/xemacs-current/work/.tools/bin/makeinfo' all --- TopLevelEmacsShell.o --- --- toolbar-xlike.o --- --- EmacsFrame.o --- --- EmacsManager.o --- --- EmacsShell.o --- --- balloon-x.o --- --- balloon_help.o --- --- console-x.o --- --- toolbar-xlike.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include toolbar-xlike.c --- TopLevelEmacsShell.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include -DDEFINE_TOP_LEVEL_EMACS_SHELL /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/EmacsShell-sub.c --- EmacsManager.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include EmacsManager.c --- EmacsFrame.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include EmacsFrame.c --- EmacsShell.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include EmacsShell.c --- balloon-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include balloon-x.c --- console-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include console-x.c --- balloon_help.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include balloon_help.c --- EmacsFrame.o --- In file included from EmacsFrame.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- console-x.o --- In file included from console-x.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- balloon_help.o --- In file included from balloon_help.c:33:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- toolbar-xlike.o --- In file included from toolbar-xlike.c:24:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- balloon-x.o --- In file included from balloon-x.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- TopLevelEmacsShell.o --- In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/EmacsShell-sub.c:79:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- EmacsShell.o --- In file included from EmacsShell.c:23:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- device-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include device-x.c In file included from device-x.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- TopLevelEmacsShell.o --- mv EmacsShell-sub.o TopLevelEmacsShell.o --- dialog-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include dialog-x.c --- fontcolor-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include fontcolor-x.c --- dialog-x.o --- In file included from dialog-x.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- fontcolor-x.o --- In file included from fontcolor-x.c:31:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- frame-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include frame-x.c In file included from frame-x.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gccache-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include gccache-x.c --- fontcolor-x.o --- fontcolor-x.c: In function 'x_print_font_instance': fontcolor-x.c:411:52: warning: format '%zx' expects argument of type 'size_t', but argument 3 has type 'Font {aka long unsigned int}' [-Wformat=] write_fmt_string (printcharfun, " font id: 0x%zx,", ~~^ %lx --- device-x.o --- device-x.c: In function 'x_IO_error_handler': device-x.c:1275:30: warning: format '%zu' expects argument of type 'size_t', but argument 2 has type 'long unsigned int' [-Wformat=] stderr_out (" after %zu requests (%zu known processed) with %u " ~~^ %lu device-x.c:1277:5: (EMACS_UINT) (NextRequest (disp) - 1), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ device-x.c:1275:44: warning: format '%zu' expects argument of type 'size_t', but argument 3 has type 'long unsigned int' [-Wformat=] stderr_out (" after %zu requests (%zu known processed) with %u " ~~^ %lu device-x.c:1291:3: warning: format '%zu' expects argument of type 'size_t', but argument 7 has type 'long int' [-Wformat=] "I/O Error %d (%s) on display connection\n" ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ device-x.c:1296:10: (EMACS_INT) (NextRequest (disp) - 1), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ device-x.c:1292:21: note: format string is defined here " \"%s\" after %zu requests (%zu known processed)\n" ~~^ %lu device-x.c:1291:3: warning: format '%zu' expects argument of type 'size_t', but argument 8 has type 'long unsigned int' [-Wformat=] "I/O Error %d (%s) on display connection\n" ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ device-x.c:1292:35: note: format string is defined here " \"%s\" after %zu requests (%zu known processed)\n" ~~^ %lu --- gccache-x.o --- In file included from gccache-x.c:53:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- glyphs-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include glyphs-x.c --- frame-x.o --- frame-x.c: In function 'Fx_window_id': frame-x.c:2230:35: warning: format '%zu' expects argument of type 'size_t', but argument 2 has type 'long unsigned int' [-Wformat=] return emacs_sprintf_string ("%zu", ~~^ %lu --- glyphs-x.o --- In file included from glyphs-x.c:53:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gui-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include gui-x.c --- intl-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include intl-x.c --- gui-x.o --- In file included from gui-x.c:26:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- intl-x.o --- In file included from intl-x.c:20:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- menubar-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include menubar-x.c --- glyphs-x.o --- glyphs-x.c: In function 'x_finalize_image_instance': glyphs-x.c:417:6: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] XDestroyWindow (dpy, IMAGE_INSTANCE_X_SUBWINDOW_ID (p)); ^~~~~~~~~~~~~~ --- menubar-x.o --- In file included from menubar-x.c:34:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- glyphs-x.o --- glyphs-x.c: In function 'x_map_subwindow': glyphs-x.c:2118:7: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] Window subwindow = IMAGE_INSTANCE_X_SUBWINDOW_ID (p); ^~~~~~ glyphs-x.c: In function 'x_redisplay_subwindow': glyphs-x.c:2163:8: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] IMAGE_INSTANCE_X_SUBWINDOW_ID (p), ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- redisplay-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include redisplay-x.c --- scrollbar-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include scrollbar-x.c --- redisplay-x.o --- In file included from redisplay-xlike-inc.c:34:0, from redisplay-x.c:33: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- scrollbar-x.o --- In file included from scrollbar-x.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ scrollbar-x.c: In function 'x_create_scrollbar_instance': scrollbar-x.c:115:32: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] "scrollbar_%zd", (EMACS_INT) (SCROLLBAR_X_ID (instance))); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ %ld --- select-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include select-x.c In file included from select-x.c:24:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- toolbar-x.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include toolbar-x.c In file included from toolbar-x.c:26:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- console-tty.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include console-tty.c In file included from console-tty.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- device-tty.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include device-tty.c In file included from device-tty.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- event-tty.o --- --- fontcolor-tty.o --- --- event-tty.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include event-tty.c --- fontcolor-tty.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include fontcolor-tty.c --- event-tty.o --- In file included from event-tty.c:23:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- frame-tty.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include frame-tty.c --- fontcolor-tty.o --- In file included from fontcolor-tty.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- frame-tty.o --- In file included from frame-tty.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- redisplay-tty.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include redisplay-tty.c In file included from redisplay-tty.c:30:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- database.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include database.c --- mule-ccl.o --- --- mule-charset.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include mule-charset.c --- mule-ccl.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include mule-ccl.c In file included from mule-ccl.c:23:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- mule-charset.o --- In file included from mule-charset.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- database.o --- In file included from database.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- mule-coding.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include mule-coding.c --- abbrev.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include abbrev.c --- mule-ccl.o --- mule-ccl.c: In function 'ccl_driver': mule-ccl.c:2001:48: warning: format '%zx' expects argument of type 'size_t', but argument 5 has type 'long int' [-Wformat=] "\nCCL: Invalid command %lx (ccl_code = %zx) at %zd.", ~~^ %lx mule-ccl.c:2001:56: warning: format '%zd' expects argument of type 'signed size_t', but argument 6 has type 'long int' [-Wformat=] "\nCCL: Invalid command %lx (ccl_code = %zx) at %zd.", ~~^ %ld mule-ccl.c:2007:56: warning: format '%zx' expects argument of type 'size_t', but argument 5 has type 'long int' [-Wformat=] "\nCCL: Invalid charset (command %x, ccl_code = %zx)" ~~^ %lx mule-ccl.c:2007:6: warning: format '%zd' expects argument of type 'signed size_t', but argument 6 has type 'long int' [-Wformat=] "\nCCL: Invalid charset (command %x, ccl_code = %zx)" ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mule-ccl.c:2008:34: note: format string is defined here " at %zd.", (int) (code & 0x1F), code, this_ic); ~~^ %ld mule-ccl.c:2013:6: warning: format '%zx' expects argument of type 'size_t', but argument 5 has type 'long int' [-Wformat=] "\nCCL: Conversion error (command %x, " ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mule-ccl.c:2014:41: note: format string is defined here "ccl_code = %zx) at %zd.", (int) (code & 0x1F), ~~^ %lx mule-ccl.c:2013:6: warning: format '%zd' expects argument of type 'signed size_t', but argument 6 has type 'long int' [-Wformat=] "\nCCL: Conversion error (command %x, " ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mule-ccl.c:2014:49: note: format string is defined here "ccl_code = %zx) at %zd.", (int) (code & 0x1F), ~~^ %ld mule-ccl.c:2033:12: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] " %zd", ~~^ %ld ccl_backtrace_table[i])); ~~~~~~~~~~~~~~~~~~~~~~ --- mule-coding.o --- In file included from mule-coding.c:31:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- abbrev.o --- In file included from abbrev.c:33:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- alloc.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include alloc.c In file included from alloc.c:40:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- alloca.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include alloca.c In file included from alloca.c:33:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- alloc.o --- alloc.c: In function 'old_alloc_sized_lcrecord': alloc.c:450:14: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Bytecount {aka long int}' [-Wformat=] stderr_out ("allocating %s (size %zd)\n", type, \ ^ alloc.c:540:3: note: in expansion of macro 'INCREMENT_CONS_COUNTER' INCREMENT_CONS_COUNTER (size, implementation->name); ^~~~~~~~~~~~~~~~~~~~~~ --- array.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include array.c --- alloc.o --- alloc.c: In function 'allocate_big_string_chars': alloc.c:450:14: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Bytecount {aka long int}' [-Wformat=] stderr_out ("allocating %s (size %zd)\n", type, \ ^ alloc.c:2599:3: note: in expansion of macro 'INCREMENT_CONS_COUNTER' INCREMENT_CONS_COUNTER (length, "string chars"); ^~~~~~~~~~~~~~~~~~~~~~ alloc.c: In function 'allocate_string_chars_struct': alloc.c:450:14: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Bytecount {aka long int}' [-Wformat=] stderr_out ("allocating %s (size %zd)\n", type, \ ^ alloc.c:2635:3: note: in expansion of macro 'INCREMENT_CONS_COUNTER' INCREMENT_CONS_COUNTER (fullsize, "string chars"); ^~~~~~~~~~~~~~~~~~~~~~ alloc.c: In function 'resize_string': alloc.c:450:14: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Bytecount {aka long int}' [-Wformat=] stderr_out ("allocating %s (size %zd)\n", type, \ ^ alloc.c:2762:4: note: in expansion of macro 'INCREMENT_CONS_COUNTER' INCREMENT_CONS_COUNTER (newfullsize, "string chars"); ^~~~~~~~~~~~~~~~~~~~~~ --- array.o --- In file included from array.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- blocktype.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include blocktype.c In file included from blocktype.c:61:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- buffer.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include buffer.c --- bytecode.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include bytecode.c --- callint.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include callint.c --- buffer.o --- In file included from buffer.c:74:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- bytecode.o --- In file included from bytecode.c:50:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- callint.o --- In file included from callint.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- casefiddle.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include casefiddle.c In file included from casefiddle.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- buffer.o --- buffer.c: In function 'Fgenerate_new_buffer_name': buffer.c:792:54: warning: format '%zd' expects argument of type 'signed size_t', but argument 5 has type 'long int' [-Wformat=] clen = emacs_snprintf (candidate, csize, "%s<%zd>", XSTRING_DATA (name), ~~^ %ld ++count); ~~~~~~~ --- bytecode.o --- bytecode.c: In function 'check_constants_index': bytecode.c:1996:59: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] "reference %d to constants array out of range 0, %zd", ~~^ %ld bytecode.c: In function 'print_compiled_function': bytecode.c:2462:43: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Charcount {aka long int}' [-Wformat=] write_fmt_string (printcharfun, "\"...(%zd)\"", ~~^ %ld --- casetab.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include casetab.c In file included from casetab.c:74:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- chartab.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include chartab.c In file included from chartab.c:52:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- cm.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include cm.c In file included from cm.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- cmdloop.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include cmdloop.c --- cmds.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include cmds.c --- cmdloop.o --- In file included from cmdloop.c:29:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- cmds.o --- In file included from cmds.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- console-stream.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include console-stream.c In file included from console-stream.c:26:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- console.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include console.c In file included from console.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- data.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include data.c In file included from data.c:26:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- debug.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include debug.c In file included from debug.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- device.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include device.c In file included from device.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- dialog.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include dialog.c --- dired.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include dired.c --- dialog.o --- In file included from dialog.c:23:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- dired.o --- In file included from dired.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- doc.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include doc.c In file included from doc.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- doprnt.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include doprnt.c In file included from doprnt.c:42:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- dumper.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include dumper.c In file included from dumper.c:35:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- doprnt.o --- In file included from doprnt.c:47:0: doprnt.c: In function 'emacs_vsnprintf': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:585:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->outbuf = buf; \ ^~~~~~~~~~~~~~~~~~~~~~~~ doprnt.c:3709:7: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (stream, output, size); ^ lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:586:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->size = bsize; \ ^~~~~~~~~~~~~~~~~~~~~~~~ doprnt.c:3709:7: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (stream, output, size); ^ doprnt.c: In function 'emacs_vsnprintf_ascbyte': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:585:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->outbuf = buf; \ ^~~~~~~~~~~~~~~~~~~~~~~~ doprnt.c:3793:3: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (stream, (Ibyte *) output, size); ^ lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:586:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->size = bsize; \ ^~~~~~~~~~~~~~~~~~~~~~~~ doprnt.c:3793:3: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (stream, (Ibyte *) output, size); ^ --- editfns.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include editfns.c In file included from editfns.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- elhash.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include elhash.c In file included from elhash.c:78:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- emacs.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include emacs.c In file included from emacs.c:424:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- emodules.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include emodules.c --- elhash.o --- elhash.c: In function 'print_hash_table': elhash.c:543:44: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Elemcount {aka long int}' [-Wformat=] write_fmt_string (printcharfun, " :size %zd", ht->count); ~~^ ~~~~~~~~~ %ld elhash.c:545:44: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Elemcount {aka long int}' [-Wformat=] write_fmt_string (printcharfun, " :size %zd/%zd", ht->count, ~~^ ~~~~~~~~~ %ld elhash.c:545:48: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'Elemcount {aka long int}' [-Wformat=] write_fmt_string (printcharfun, " :size %zd/%zd", ht->count, ~~^ %ld ht->size); ~~~~~~~~ --- emodules.o --- In file included from emodules.h:34:0, from emodules.c:22: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- emacs.o --- In file included from /usr/include/stdio.h:597:0, from lisp.h:114, from emacs.c:425: emacs.c: In function 'assert_equal_failed': emacs.c:3937:20: warning: format '%zd' expects argument of type 'signed size_t', but argument 6 has type 'long int' [-Wformat=] sprintf (bigstr, "%s (%zd) should == %s (%zd) but doesn't", ^ emacs.c:3937:20: warning: format '%zd' expects argument of type 'signed size_t', but argument 8 has type 'long int' [-Wformat=] --- emodules.o --- emodules.c: In function 'emodules_load': emodules.c:385:80: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] signal_ferror (Qdll_error, "Invalid dynamic module: Unsupported version `%zd(%zd)'", *ellcc_rev, EMODULES_REVISION); ~~^ ~~~~~~~~~~ %ld emodules.c:385:84: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] signal_ferror (Qdll_error, "Invalid dynamic module: Unsupported version `%zd(%zd)'", *ellcc_rev, EMODULES_REVISION); ~~^ %ld --- eval.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include eval.c In file included from eval.c:136:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- event-stream.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include event-stream.c --- eval.o --- eval.c: In function 'print_multiple_value': eval.c:4742:56: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Elemcount {aka long int}' [-Wformat=] "#", ~~^ %ld --- event-unixoid.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include event-unixoid.c --- event-stream.o --- In file included from event-stream.c:79:0: event-stream.c: In function 'echo_key_event': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:585:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->outbuf = buf; \ ^~~~~~~~~~~~~~~~~~~~~~~~ event-stream.c:649:3: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM ^ lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:586:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->size = bsize; \ ^~~~~~~~~~~~~~~~~~~~~~~~ event-stream.c:649:3: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM ^ --- event-unixoid.o --- In file included from event-unixoid.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- events.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include events.c In file included from events.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ events.c: In function 'print_event': events.c:284:52: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "#pos < 0 ? soe->pos : ~~~~~~~~~~~~~~~~~~~~~~~~~ object_memxpos_to_bytexpos (obj, soe->pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extents.c:1038:42: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Memxpos {aka long int}' [-Wformat=] stderr_out ("SOE pos is %zd (memxpos %zd)\n", ~~^ %ld extents.c:1041:8: soe->pos); ~~~~~~~~ extents.c: In function 'soe_move': extents.c:1144:36: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] stderr_out ("Moving SOE from %zd (memxpos %zd) to %zd (memxpos %zd)\n", ~~^ %ld soe->pos < 0 ? soe->pos : ~~~~~~~~~~~~~~~~~~~~~~~~~ object_memxpos_to_bytexpos (obj, soe->pos), soe->pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extents.c:1144:49: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Memxpos {aka long int}' [-Wformat=] stderr_out ("Moving SOE from %zd (memxpos %zd) to %zd (memxpos %zd)\n", ~~^ %ld extents.c:1146:47: object_memxpos_to_bytexpos (obj, soe->pos), soe->pos, ~~~~~~~~ extents.c:1144:57: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'Bytexpos {aka long int}' [-Wformat=] stderr_out ("Moving SOE from %zd (memxpos %zd) to %zd (memxpos %zd)\n", ~~^ %ld extents.c:1147:3: object_memxpos_to_bytexpos (obj, pos), pos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extents.c:1144:70: warning: format '%zd' expects argument of type 'signed size_t', but argument 5 has type 'Memxpos {aka long int}' [-Wformat=] stderr_out ("Moving SOE from %zd (memxpos %zd) to %zd (memxpos %zd)\n", ~~^ %ld extents.c: In function 'print_extent_1': extents.c:2798:40: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] "%zd, %zd", ~~^ %ld extents.c:2798:45: warning: format '%zd' expects argument of type 'signed size_t', but argument 5 has type 'long int' [-Wformat=] "%zd, %zd", ~~^ %ld --- faces.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include faces.c In file included from faces.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- file-coding.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include file-coding.c In file included from file-coding.c:70:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ In file included from file-coding.c:76:0: file-coding.c: In function 'coding_stream_coding_system': file-coding.h:1146:63: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] #define CODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, coding) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ file-coding.c:2558:10: note: in expansion of macro 'CODING_STREAM_DATA' return CODING_STREAM_DATA (stream)->codesys; ^~~~~~~~~~~~~~~~~~ file-coding.c: In function 'detect_coding_type': file-coding.c:4464:52: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'Bytecount {aka long int}' [-Wformat=] debug_out ("detect_coding_type: processing %zd bytes\n", n); ~~^ %ld --- fileio.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include fileio.c In file included from fileio.c:30:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- filelock.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include filelock.c In file included from filelock.c:21:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- filemode.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include filemode.c In file included from filemode.c:21:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- floatfns.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include floatfns.c In file included from floatfns.c:45:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- fns.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include fns.c In file included from fns.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- font-lock.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include font-lock.c In file included from font-lock.c:45:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- fontcolor.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include fontcolor.c In file included from fontcolor.c:24:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- frame.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include frame.c In file included from frame.c:365:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gc.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include gc.c In file included from gc.c:46:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ gc.c: In function 'lispdesc_indirect_count_1': gc.c:212:69: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] stderr_out ("Unsupported count type : %d (line = %d, code = %zd)\n", ~~^ %ld --- general.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include general.c --- glyphs-eimage.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include glyphs-eimage.c --- general.o --- In file included from general.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- glyphs-eimage.o --- In file included from glyphs-eimage.c:41:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- glyphs-shared.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include glyphs-shared.c In file included from glyphs-shared.c:30:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- glyphs-widget.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include glyphs-widget.c In file included from glyphs-widget.c:24:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- glyphs.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include glyphs.c In file included from glyphs.c:45:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gui.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include gui.c --- gutter.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include gutter.c --- gui.o --- In file included from gui.c:26:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- gutter.o --- In file included from gutter.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- glyphs.o --- glyphs.c: In function 'print_image_instance': glyphs.c:970:41: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "%zd", ~~^ %ld glyphs.c:976:41: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "%zd", ~~^ %ld --- imgproc.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include imgproc.c In file included from imgproc.c:42:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- indent.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include indent.c In file included from indent.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- inline.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include inline.c --- input-method-xlib.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include input-method-xlib.c --- inline.o --- In file included from inline.c:42:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- input-method-xlib.o --- In file included from input-method-xlib.c:70:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- insdel.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include insdel.c In file included from insdel.c:34:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- intl.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include intl.c In file included from intl.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- keymap.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include keymap.c --- lastfile.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lastfile.c --- keymap.o --- In file included from keymap.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- line-number.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include line-number.c --- linuxplay.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include linuxplay.c --- line-number.o --- In file included from line-number.c:51:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- linuxplay.o --- In file included from linuxplay.c:58:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- keymap.o --- keymap.c: In function 'print_keymap': keymap.c:302:43: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "size %zd 0x%x>", ~~^ %ld In file included from keymap.c:31:0: keymap.c: In function 'where_is_to_Ibyte': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:585:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->outbuf = buf; \ ^~~~~~~~~~~~~~~~~~~~~~~~ keymap.c:3895:3: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (stream, buffer, bufsize); ^ lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:586:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->size = bsize; \ ^~~~~~~~~~~~~~~~~~~~~~~~ keymap.c:3895:3: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (stream, buffer, bufsize); ^ --- lread.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lread.c In file included from lread.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- lstream.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include lstream.c --- macros.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include macros.c In file included from macros.c:30:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- lstream.o --- In file included from lstream.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ In file included from lstream.c:31:0: lstream.c: In function 'stdio_rewinder': lstream.c:1076:62: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] #define STDIO_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, stdio) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1153:11: note: in expansion of macro 'STDIO_STREAM_DATA' rewind (STDIO_STREAM_DATA (stream)->file); ^~~~~~~~~~~~~~~~~ lstream.c: In function 'filedesc_rewinder': lstream.c:1206:65: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] #define FILEDESC_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, filedesc) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1464:14: note: in expansion of macro 'FILEDESC_STREAM_DATA' lseek (FILEDESC_STREAM_DATA (stream)->fd, str->starting_pos, ^~~~~~~~~~~~~~~~~~~~ lstream.c: In function 'fixed_buffer_rewinder': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1694:3: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (stream)->offset = 0; ^~~~~~~~~~~~~~~~~~~~~~~~ lstream.c: In function 'fixed_buffer_input_stream_ptr': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1702:10: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' return FIXED_BUFFER_STREAM_DATA (stream)->inbuf; ^~~~~~~~~~~~~~~~~~~~~~~~ lstream.c: In function 'fixed_buffer_output_stream_ptr': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1709:10: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' return FIXED_BUFFER_STREAM_DATA (stream)->outbuf; ^~~~~~~~~~~~~~~~~~~~~~~~ lstream.c: In function 'make_resizing_buffer_output_stream': lstream.c:1715:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, resizing_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1738:3: note: in expansion of macro 'RESIZING_BUFFER_STREAM_DATA' RESIZING_BUFFER_STREAM_DATA (stream)->extent_info ^~~~~~~~~~~~~~~~~~~~~~~~~~~ lstream.c: In function 'resizing_buffer_rewinder': lstream.c:1715:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, resizing_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1775:3: note: in expansion of macro 'RESIZING_BUFFER_STREAM_DATA' RESIZING_BUFFER_STREAM_DATA (stream)->stored = 0; ^~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from lisp.h:1841:0, from lstream.c:26: lstream.c: In function 'resizing_buffer_extent_info': lstream.c:1715:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, resizing_buffer) ^ lrecord.h:1616:25: note: in definition of macro 'XRECORD' error_check_##c_name (x, __FILE__, __LINE__) ^ lstream.c:1795:10: note: in expansion of macro 'XEXTENT_INFO' return XEXTENT_INFO (RESIZING_BUFFER_STREAM_DATA (stream)->extent_info); ^~~~~~~~~~~~ lstream.c:1715:3: note: in expansion of macro 'LSTREAM_TYPE_DATA' LSTREAM_TYPE_DATA (stream, resizing_buffer) ^~~~~~~~~~~~~~~~~ lstream.c:1795:24: note: in expansion of macro 'RESIZING_BUFFER_STREAM_DATA' return XEXTENT_INFO (RESIZING_BUFFER_STREAM_DATA (stream)->extent_info); ^~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from lstream.c:31:0: lstream.c: In function 'resizing_buffer_stream_ptr': lstream.c:1715:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, resizing_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1801:10: note: in expansion of macro 'RESIZING_BUFFER_STREAM_DATA' return RESIZING_BUFFER_STREAM_DATA (stream)->buf; ^~~~~~~~~~~~~~~~~~~~~~~~~~~ lstream.c: In function 'resizing_buffer_to_lisp_string': lstream.c:1715:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, resizing_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1810:25: note: in expansion of macro 'RESIZING_BUFFER_STREAM_DATA' result = make_string (RESIZING_BUFFER_STREAM_DATA (stream)->buf, ^~~~~~~~~~~~~~~~~~~~~~~~~~~ lstream.c: In function 'make_dynarr_output_stream': lstream.c:1874:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, dynarr) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:1888:3: note: in expansion of macro 'DYNARR_STREAM_DATA' DYNARR_STREAM_DATA (XLSTREAM (obj))->dyn = dyn; ^~~~~~~~~~~~~~~~~~ In file included from lisp.h:1885:0, from lstream.c:26: lstream.c: In function 'dynarr_rewinder': lstream.c:1874:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, dynarr) ^ array.h:458:45: note: in definition of macro 'Dynarr_reset' #define Dynarr_reset(d) Dynarr_set_lengthr (d, 0) ^ lstream.c:1874:3: note: in expansion of macro 'LSTREAM_TYPE_DATA' LSTREAM_TYPE_DATA (stream, dynarr) ^~~~~~~~~~~~~~~~~ lstream.c:1904:17: note: in expansion of macro 'DYNARR_STREAM_DATA' Dynarr_reset (DYNARR_STREAM_DATA (stream)->dyn); ^~~~~~~~~~~~~~~~~~ In file included from lstream.c:31:0: lstream.c: In function 'lisp_buffer_stream_startpos': lstream.c:1921:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, lisp_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.c:2122:27: note: in expansion of macro 'LISP_BUFFER_STREAM_DATA' return marker_position (LISP_BUFFER_STREAM_DATA (stream)->start); ^~~~~~~~~~~~~~~~~~~~~~~ --- marker.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include marker.c In file included from marker.c:32:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ marker.c: In function 'print_marker': marker.c:51:45: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Charbpos {aka long int}' [-Wformat=] write_fmt_string (printcharfun, "at %zd in ", ~~^ %ld marker_position (obj)); ~~~~~~~~~~~~~~~~~~~~~ --- md5.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include md5.c In file included from md5.c:26:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- menubar.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include menubar.c In file included from menubar.c:31:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- minibuf.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include minibuf.c In file included from minibuf.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- miscplay.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include miscplay.c --- number-gmp.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include number-gmp.c --- miscplay.o --- In file included from miscplay.c:23:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- number-gmp.o --- In file included from number-gmp.c:21:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- number.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include number.c In file included from number.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- number-gmp.o --- number-gmp.c: In function 'bigfloat_to_string': number-gmp.c:182:37: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] "E%zd", expt); ~~^ %ld --- opaque.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include opaque.c In file included from opaque.c:35:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- print.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include print.c In file included from print.c:33:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- opaque.o --- opaque.c: In function 'print_opaque': opaque.c:50:55: warning: format '%zu' expects argument of type 'size_t', but argument 3 has type 'long unsigned int' [-Wformat=] "#", ~~^ %lu (EMACS_UINT)(p->size), LISP_OBJECT_UID (obj)); ~~~~~~~~~~~~~~~~~~~~~ opaque.c: In function 'hash_opaque': opaque.c:105:5: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] return *((Hashcode *) XOPAQUE_DATA (obj)); ^~~~~~ --- process-unix.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include process-unix.c --- print.o --- print.c: In function 'printing_major_badness': print.c:1644:63: warning: format '%zd' expects argument of type 'signed size_t', but argument 6 has type 'long int' [-Wformat=] emacs_snprintf (buf, sizeof (buf), "%s type %d object %zd", ~~^ %ld badness_string, type, (EMACS_INT) val); ~~~~~~~~~~~~~~~ print.c: In function 'print_gensym_or_circle': print.c:1894:47: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "#%zd#", ~~^ %ld (XFIXNUM (seen) & PRINT_NUMBER_ORDINAL_MASK) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >> PRINT_NUMBER_ORDINAL_SHIFT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print.c:1903:47: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "#%zd=", ~~^ %ld (XFIXNUM (seen) & PRINT_NUMBER_ORDINAL_MASK) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >> PRINT_NUMBER_ORDINAL_SHIFT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print.c: In function 'debug_p4': print.c:2813:21: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] debug_out ("%zd", XFIXNUM (obj)); ~~^ %ld print.c:2827:28: warning: format '%zx' expects argument of type 'size_t', but argument 3 has type 'long int' [-Wformat=] debug_out ("#<%s addr=0x%zx uid=0x%zx>", ~~^ %lx print.c:2829:6: (EMACS_INT) header, ~~~~~~~~~~~~~~~~~~ print.c:2827:38: warning: format '%zx' expects argument of type 'size_t', but argument 4 has type 'long int' [-Wformat=] debug_out ("#<%s addr=0x%zx uid=0x%zx>", ~~^ %lx print.c:2830:6: (EMACS_INT) ((struct lrecord_header *) header)->uid); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- process-unix.o --- In file included from process-unix.c:31:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- process.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include process.c --- profile.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include profile.c --- process.o --- In file included from process.c:34:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- profile.o --- In file included from profile.c:20:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- process-unix.o --- process-unix.c: In function 'unix_init_process_io_handles': process-unix.c:842:52: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] "failed setting pipe (fd %zd) to nonblocking mode", ~~^ %ld (EMACS_INT) in); ~~~~~~~~~~~~~~ process-unix.c: In function 'unix_open_network_stream': process-unix.c:1901:36: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] "%zd", XFIXNUM (service)); ~~^ %ld --- ralloc.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include ralloc.c --- rangetab.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include rangetab.c --- realpath.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include realpath.c --- ralloc.o --- In file included from ralloc.c:29:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- rangetab.o --- In file included from rangetab.c:24:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- realpath.o --- In file included from realpath.c:29:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- rangetab.o --- rangetab.c: In function 'print_range_table': rangetab.c:125:44: warning: format '%zd' expects argument of type 'signed size_t', but argument 4 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "%c%zd %zd%c ", ~~^ %ld rangetab.c:127:4: (Bytecount) (rte.first - so), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rangetab.c:125:48: warning: format '%zd' expects argument of type 'signed size_t', but argument 5 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, "%c%zd %zd%c ", ~~^ %ld rangetab.c:128:4: (Bytecount) (rte.last - ec), ~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- redisplay-output.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include redisplay-output.c --- redisplay.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include redisplay.c --- redisplay-output.o --- In file included from redisplay-output.c:32:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- redisplay.o --- In file included from redisplay.c:46:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- regex.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include regex.c In file included from regex.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- scrollbar.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include scrollbar.c In file included from scrollbar.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- redisplay.o --- In file included from redisplay.c:50:0: redisplay.c: In function 'add_disp_table_entry_runes_1': lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:585:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->outbuf = buf; \ ^~~~~~~~~~~~~~~~~~~~~~~~ redisplay.c:1552:11: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (format_buf_lispobj, ^ lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:586:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->size = bsize; \ ^~~~~~~~~~~~~~~~~~~~~~~~ redisplay.c:1552:11: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (format_buf_lispobj, ^ lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:585:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->outbuf = buf; \ ^~~~~~~~~~~~~~~~~~~~~~~~ redisplay.c:1566:15: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (format_buf_lispobj, ^ lstream.h:562:30: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] LSTREAM_TYPE_DATA (stream, fixed_buffer) ^ lstream.h:315:12: note: in definition of macro 'LSTREAM_TYPE_DATA' ((struct type##_stream *) \ ^~~~ lstream.h:586:7: note: in expansion of macro 'FIXED_BUFFER_STREAM_DATA' FIXED_BUFFER_STREAM_DATA (&(lname##u.l))->size = bsize; \ ^~~~~~~~~~~~~~~~~~~~~~~~ redisplay.c:1566:15: note: in expansion of macro 'INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM' INIT_STACK_FIXED_BUFFER_OUTPUT_STREAM (format_buf_lispobj, ^ --- regex.o --- regex.c: In function 'print_partial_compiled_pattern': regex.c:851:18: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("%zd:\t", (Bytecount)(p - start)); ~~^ ~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c:953:19: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] printf ("(0x%zx)", (Bytecount)first); ~~^ %lx regex.c:960:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] printf ("(0x%zx)", (Bytecount)last); ~~^ %lx regex.c:979:37: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/on_failure_jump to %zd", (Bytecount)(p + mcnt - start)); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c:984:49: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/on_failure_keep_string_jump to %zd", ~~^ %ld (Bytecount)(p + mcnt - start)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:990:40: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/dummy_failure_jump to %zd", ~~^ %ld (Bytecount)(p + mcnt - start)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:1000:36: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/maybe_pop_jump to %zd", (Bytecount)(p + mcnt - start)); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c:1005:38: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/pop_failure_jump to %zd", (Bytecount)(p + mcnt - start)); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c:1010:35: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/jump_past_alt to %zd", (Bytecount)(p + mcnt - start)); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c:1015:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/jump to %zd", (Bytecount)(p + mcnt - start)); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c:1021:29: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/succeed_n to %zd, %d times", ~~^ %ld (Bytecount)(p + mcnt - start), mcnt2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:1028:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/jump_n to %zd, %d times", ~~^ %ld (Bytecount)(p + mcnt - start), mcnt2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:1035:39: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("/set_number_at location %zd to %d", ~~^ %ld (Bytecount)(p + mcnt - start), mcnt2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:1119:14: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("%zd:\tend of pattern.\n", (Bytecount)(p - start)); ~~^ ~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c: In function 'print_compiled_pattern': regex.c:1129:14: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'Bytecount {aka long int}' [-Wformat=] printf ("%zd bytes used/%zd bytes allocated.\n", bufp->used, ~~^ ~~~~~~~~~~ %ld regex.c:1129:29: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'Bytecount {aka long int}' [-Wformat=] printf ("%zd bytes used/%zd bytes allocated.\n", bufp->used, ~~^ %ld bufp->allocated); ~~~~~~~~~~~~~~~ regex.c:1138:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("re_nsub: %zd\t", (Bytecount)bufp->re_nsub); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c:1139:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] printf ("re_ngroups: %zd\t", (Bytecount)bufp->re_ngroups); ~~^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ %ld regex.c: In function 're_match_2_internal': regex.c:5796:27: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ^ ~~~~~~~~~~~~~ regex.c:789:50: note: in definition of macro 'DEBUG_MATCH_PRINT2' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) ^~ regex.c:6202:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ^ regex.c:789:50: note: in definition of macro 'DEBUG_MATCH_PRINT2' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) ^~ regex.c:6206:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ^ regex.c:789:50: note: in definition of macro 'DEBUG_MATCH_PRINT2' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) ^~ regex.c:6249:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ^ regex.c:789:50: note: in definition of macro 'DEBUG_MATCH_PRINT2' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) ^~ regex.c:6252:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT2 (" regend: %zd\n", ^ regex.c:789:50: note: in definition of macro 'DEBUG_MATCH_PRINT2' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) ^~ regex.c:1731:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Before push, next avail: %zd\n", \ ^ (Bytecount) (fail_stack).avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1733:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1737:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1756:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 ("\n Doubled stack; size now: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1758:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" slots available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1777:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ^ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1779:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ^ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1781:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" info: 0x%zx\n ", \ ^ * (long *) (®_info[this_reg])); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1814:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing pattern 0x%zx: \n", \ ^ (Bytecount) pattern_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) string_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (string_place, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing failure id: %u\n", failure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* This is the number of items that are pushed and popped on the stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for each register. */ ~~~~~~~~~~~~~~~~~~~~~~ #define NUM_REG_ITEMS 3 ~~~~~~~~~~~~~~~~~~~~~~~~ /* Individual items aside from the registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ #define NUM_NONREG_ITEMS 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We push at most this many items on the stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We used to use (num_regs - 1), which is the number of registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this regexp will save; but that was changed to 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to avoid stack overflow for a regexp with lots of parens. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We actually push this many items. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NUM_FAILURE_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~ /* How many items can still be added to the stack without overflowing it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Pops what PUSH_FAIL_STACK pushes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We restore into the following parameters, all of which should be lvalues: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STR -- the saved data position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PAT -- the saved pattern position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LOW_REG, HIGH_REG -- the highest and lowest active registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGSTART, REGEND -- arrays of string positions. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_INFO -- array of information about each subexpression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Also assumes the variables `fail_stack' and (if debugging), `bufp', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pend', `string1', `size1', `string2', and `size2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart, regend, reg_info) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ DEBUG_STATEMENT (int ffailure_id;) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int this_reg; \ ~~~~~~~~~~~~~~~~~~~~~~ const unsigned char *string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Remove failure points and point to how many regs pushed. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("POP_FAILURE_POINT:\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Before pop, next avail: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ DEBUG_STATEMENT (ffailure_id = POP_FAILURE_INT()); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* If the saved string location is NULL, it came from an \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_keep_string_jump opcode, and we want to throw away the \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ saved NULL, thus retaining our current position in the string. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string_temp = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (string_temp != NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ str = string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ pat = (unsigned char *) POP_FAILURE_POINTER (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Restore register info. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ high_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ low_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping failure id: %d\n", ffailure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping string 0x%zx: `", (Bytecount) str); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1817:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ^ (Bytecount) string_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (string_place, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing failure id: %u\n", failure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* This is the number of items that are pushed and popped on the stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for each register. */ ~~~~~~~~~~~~~~~~~~~~~~ #define NUM_REG_ITEMS 3 ~~~~~~~~~~~~~~~~~~~~~~~~ /* Individual items aside from the registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ #define NUM_NONREG_ITEMS 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We push at most this many items on the stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We used to use (num_regs - 1), which is the number of registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this regexp will save; but that was changed to 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to avoid stack overflow for a regexp with lots of parens. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We actually push this many items. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NUM_FAILURE_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~ /* How many items can still be added to the stack without overflowing it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Pops what PUSH_FAIL_STACK pushes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We restore into the following parameters, all of which should be lvalues: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STR -- the saved data position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PAT -- the saved pattern position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LOW_REG, HIGH_REG -- the highest and lowest active registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGSTART, REGEND -- arrays of string positions. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_INFO -- array of information about each subexpression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Also assumes the variables `fail_stack' and (if debugging), `bufp', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pend', `string1', `size1', `string2', and `size2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart, regend, reg_info) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ DEBUG_STATEMENT (int ffailure_id;) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int this_reg; \ ~~~~~~~~~~~~~~~~~~~~~~ const unsigned char *string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Remove failure points and point to how many regs pushed. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("POP_FAILURE_POINT:\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Before pop, next avail: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ DEBUG_STATEMENT (ffailure_id = POP_FAILURE_INT()); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* If the saved string location is NULL, it came from an \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_keep_string_jump opcode, and we want to throw away the \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ saved NULL, thus retaining our current position in the string. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string_temp = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (string_temp != NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ str = string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ pat = (unsigned char *) POP_FAILURE_POINTER (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Restore register info. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ high_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ low_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping failure id: %d\n", ffailure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping string 0x%zx: `", (Bytecount) str); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6370:7: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:6534:31: warning: format '%zx' expects argument of type 'size_t', but argument 3 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ^ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~ regex.c:791:50: note: in definition of macro 'DEBUG_MATCH_PRINT3' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2, x3) ^~ regex.c:1731:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Before push, next avail: %zd\n", \ ^ (Bytecount) (fail_stack).avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1733:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1737:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1756:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 ("\n Doubled stack; size now: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1758:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" slots available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1777:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ^ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1779:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ^ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1781:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" info: 0x%zx\n ", \ ^ * (long *) (®_info[this_reg])); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1814:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing pattern 0x%zx: \n", \ ^ (Bytecount) pattern_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) string_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (string_place, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing failure id: %u\n", failure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* This is the number of items that are pushed and popped on the stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for each register. */ ~~~~~~~~~~~~~~~~~~~~~~ #define NUM_REG_ITEMS 3 ~~~~~~~~~~~~~~~~~~~~~~~~ /* Individual items aside from the registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ #define NUM_NONREG_ITEMS 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We push at most this many items on the stack. */ --- search.o --- --- regex.o --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We used to use (num_regs - 1), which is the number of registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this regexp will save; but that was changed to 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to avoid stack overflow for a regexp with lots of parens. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We actually push this many items. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NUM_FAILURE_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~ /* How many items can still be added to the stack without overflowing it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Pops what PUSH_FAIL_STACK pushes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We restore into the following parameters, all of which should be lvalues: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STR -- the saved data position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PAT -- the saved pattern position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LOW_REG, HIGH_REG -- the highest and lowest active registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGSTART, REGEND -- arrays of string positions. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_INFO -- array of information about each subexpression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Also assumes the variables `fail_stack' and (if debugging), `bufp', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pend', `string1', `size1', `string2', and `size2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart, regend, reg_info) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ DEBUG_STATEMENT (int ffailure_id;) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int this_reg; \ ~~~~~~~~~~~~~~~~~~~~~~ const unsigned char *string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Remove failure points and point to how many regs pushed. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("POP_FAILURE_POINT:\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Before pop, next avail: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ DEBUG_STATEMENT (ffailure_id = POP_FAILURE_INT()); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* If the saved string location is NULL, it came from an \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_keep_string_jump opcode, and we want to throw away the \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ saved NULL, thus retaining our current position in the string. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string_temp = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (string_temp != NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ str = string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ pat = (unsigned char *) POP_FAILURE_POINTER (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Restore register info. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ high_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ low_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping failure id: %d\n", ffailure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping string 0x%zx: `", (Bytecount) str); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); --- search.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include search.c --- regex.o --- ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ } ~ } ~ /* We used to move past the register number and inner group count ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here, when registers were just one byte; that's no longer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ necessary with EXTRACT_NUMBER_AND_INCR(), above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* \ has been turned into a `duplicate' command which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ followed by the numeric value of as the register number. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Already passed through external-to-internal-register mapping, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it refers to the actual group number, not the non-shy-only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numbering used in the external world.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ { ~ REGISTER re_char *d2, *dend2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Get which register to match against. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regno; ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference a group which we've never matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* Where in input to try to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = regstart[regno]; ~~~~~~~~~~~~~~~~~~~~~ /* Where to stop matching; if both the place to start and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the place to stop matching are in the same string, then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set to the place to stop, otherwise, for now have to use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the first string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend2 = ((FIRST_STRING_P (regstart[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == FIRST_STRING_P (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? regend[regno] : end_match_1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ /* If necessary, advance to next segment in register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents. */ ~~~~~~~~~~~~~ while (d2 == dend2) ~~~~~~~~~~~~~~~~~~~ { ~ if (dend2 == end_match_2) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend2 == regend[regno]) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = string2; ~~~~~~~~~~~~~ dend2 = regend[regno]; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* At end of register contents => success */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d2 == dend2) break; ~~~~~~~~~~~~~~~~~~~~~~~ /* If necessary, advance to next segment in data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ /* How many characters left in this segment to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend - d; ~~~~~~~~~~~~~~~~ /* Want how many consecutive characters we can match in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one shot, so, if necessary, adjust the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt > dend2 - d2) ~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend2 - d2; ~~~~~~~~~~~~~~~~~~ /* Compare that many; failure if mismatch, else move ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ past them. */ ~~~~~~~~~~~~~~ if (TRANSLATE_P (translate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bcmp_translate (d, d2, mcnt, translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , fmt, lispobj ~~~~~~~~~~~~~~ #endif ~~~~~~ ) ~ : memcmp (d, d2, mcnt)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ d += mcnt, d2 += mcnt; ~~~~~~~~~~~~~~~~~~~~~~ /* Do this because we've match some characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ } ~ break; ~~~~~~ /* begline matches the empty string at the beginning of the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (unless `not_bol' is set in `bufp'), and, if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `newline_anchor' is set, after newlines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_bol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ re_char *d2 = d; ~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (d2); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* In all other cases, we fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* endline is the dual of begline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_eol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We have to ``prefetch'' the next character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if ((d == end1 ? ~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (string2, fmt, lispobj) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ goto fail; ~~~~~~~~~~ /* Match at the very beginning of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* Match at the very end of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* on_failure_keep_string_jump is used to optimize `.*\n'. It ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pushes NULL as the value for the string on the stack. Then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point' will keep the current value for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string, instead of restoring it. To see why, consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching `foo\nbar' against `.*\n'. The .* matches the foo; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the . fails against the \n. But the next thing we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to do is match the \n against the \n; if we restored the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string value, we would be back at the foo. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this is used only in specific cases, we don't need to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check all the things that `on_failure_jump' does, to make ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sure the right things get saved on the stack. Hence we don't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ share its code. The only reason to push anything on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack at all is that otherwise we would have to change ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `anychar's code to do something besides goto fail in this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case; that seems worse than this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1817:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6537:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:6558:31: warning: format '%zx' expects argument of type 'size_t', but argument 3 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT3 (" %d (to 0x%zx)", mcnt, (Bytecount) (p + mcnt)); ^ ~~~~~~~~~~~~~~~~~~~~~~ regex.c:791:50: note: in definition of macro 'DEBUG_MATCH_PRINT3' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2, x3) ^~ regex.c:1731:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Before push, next avail: %zd\n", \ ^ (Bytecount) (fail_stack).avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1733:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1737:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1756:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 ("\n Doubled stack; size now: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1758:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" slots available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1777:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ^ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1779:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ^ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1781:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" info: 0x%zx\n ", \ ^ * (long *) (®_info[this_reg])); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1814:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing pattern 0x%zx: \n", \ ^ (Bytecount) pattern_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) string_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (string_place, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing failure id: %u\n", failure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* This is the number of items that are pushed and popped on the stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for each register. */ ~~~~~~~~~~~~~~~~~~~~~~ #define NUM_REG_ITEMS 3 ~~~~~~~~~~~~~~~~~~~~~~~~ /* Individual items aside from the registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ #define NUM_NONREG_ITEMS 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We push at most this many items on the stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We used to use (num_regs - 1), which is the number of registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this regexp will save; but that was changed to 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to avoid stack overflow for a regexp with lots of parens. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We actually push this many items. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NUM_FAILURE_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~ /* How many items can still be added to the stack without overflowing it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Pops what PUSH_FAIL_STACK pushes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We restore into the following parameters, all of which should be lvalues: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STR -- the saved data position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PAT -- the saved pattern position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LOW_REG, HIGH_REG -- the highest and lowest active registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGSTART, REGEND -- arrays of string positions. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_INFO -- array of information about each subexpression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Also assumes the variables `fail_stack' and (if debugging), `bufp', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pend', `string1', `size1', `string2', and `size2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart, regend, reg_info) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ DEBUG_STATEMENT (int ffailure_id;) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int this_reg; \ ~~~~~~~~~~~~~~~~~~~~~~ const unsigned char *string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Remove failure points and point to how many regs pushed. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("POP_FAILURE_POINT:\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Before pop, next avail: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ DEBUG_STATEMENT (ffailure_id = POP_FAILURE_INT()); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* If the saved string location is NULL, it came from an \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_keep_string_jump opcode, and we want to throw away the \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ saved NULL, thus retaining our current position in the string. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string_temp = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (string_temp != NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ str = string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ pat = (unsigned char *) POP_FAILURE_POINTER (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Restore register info. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ high_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ low_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping failure id: %d\n", ffailure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping string 0x%zx: `", (Bytecount) str); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- search.o --- In file included from search.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- regex.o --- *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ } ~ } ~ /* We used to move past the register number and inner group count ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here, when registers were just one byte; that's no longer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ necessary with EXTRACT_NUMBER_AND_INCR(), above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* \ has been turned into a `duplicate' command which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ followed by the numeric value of as the register number. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Already passed through external-to-internal-register mapping, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it refers to the actual group number, not the non-shy-only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numbering used in the external world.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ { ~ REGISTER re_char *d2, *dend2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Get which register to match against. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regno; ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference a group which we've never matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* Where in input to try to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = regstart[regno]; ~~~~~~~~~~~~~~~~~~~~~ /* Where to stop matching; if both the place to start and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the place to stop matching are in the same string, then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set to the place to stop, otherwise, for now have to use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the first string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend2 = ((FIRST_STRING_P (regstart[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == FIRST_STRING_P (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? regend[regno] : end_match_1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ /* If necessary, advance to next segment in register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents. */ ~~~~~~~~~~~~~ while (d2 == dend2) ~~~~~~~~~~~~~~~~~~~ { ~ if (dend2 == end_match_2) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend2 == regend[regno]) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = string2; ~~~~~~~~~~~~~ dend2 = regend[regno]; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* At end of register contents => success */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d2 == dend2) break; ~~~~~~~~~~~~~~~~~~~~~~~ /* If necessary, advance to next segment in data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ /* How many characters left in this segment to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend - d; ~~~~~~~~~~~~~~~~ /* Want how many consecutive characters we can match in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one shot, so, if necessary, adjust the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt > dend2 - d2) ~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend2 - d2; ~~~~~~~~~~~~~~~~~~ /* Compare that many; failure if mismatch, else move ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ past them. */ ~~~~~~~~~~~~~~ if (TRANSLATE_P (translate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bcmp_translate (d, d2, mcnt, translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , fmt, lispobj ~~~~~~~~~~~~~~ #endif ~~~~~~ ) ~ : memcmp (d, d2, mcnt)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ d += mcnt, d2 += mcnt; ~~~~~~~~~~~~~~~~~~~~~~ /* Do this because we've match some characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ } ~ break; ~~~~~~ /* begline matches the empty string at the beginning of the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (unless `not_bol' is set in `bufp'), and, if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `newline_anchor' is set, after newlines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_bol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ re_char *d2 = d; ~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (d2); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* In all other cases, we fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* endline is the dual of begline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_eol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We have to ``prefetch'' the next character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if ((d == end1 ? ~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (string2, fmt, lispobj) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ goto fail; ~~~~~~~~~~ /* Match at the very beginning of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* Match at the very end of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* on_failure_keep_string_jump is used to optimize `.*\n'. It ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pushes NULL as the value for the string on the stack. Then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point' will keep the current value for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string, instead of restoring it. To see why, consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching `foo\nbar' against `.*\n'. The .* matches the foo; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the . fails against the \n. But the next thing we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to do is match the \n against the \n; if we restored the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string value, we would be back at the foo. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this is used only in specific cases, we don't need to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check all the things that `on_failure_jump' does, to make ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sure the right things get saved on the stack. Hence we don't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ share its code. The only reason to push anything on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack at all is that otherwise we would have to change ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `anychar's code to do something besides goto fail in this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case; that seems worse than this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Uses of on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each alternative starts with an on_failure_jump that points ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the beginning of the next alternative. Each alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ except the last ends with a jump that in effect jumps past ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the rest of the alternatives. (They really jump to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending jump of the following alternative, because tensioning ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ these jumps is a hassle.) ~~~~~~~~~~~~~~~~~~~~~~~~~ Repeats start with an on_failure_jump that points past both ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the repetition text and either the following jump or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump back to this on_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ on_failure: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx)", mcnt, (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this on_failure_jump comes right before a group (i.e., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the original * applied to a group), save the information ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for that group and all inner ones, so that if we fail back ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to this point, the group's information will be correct. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, in \(a*\)*\1, we need the preceding group, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and in \(\(a*\)b*\)\2, we need the inner group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We can't use `p' to check ahead because we push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a failure point to `p + mcnt' after we do this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* We need to skip no_op's before we look for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start_memory in case this on_failure_jump is happening as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against aba. */ ~~~~~~~~~~~~~~~~ while (p1 < pend && (re_opcode_t) *p1 == no_op) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1++; ~~~~~ if (p1 < pend && (re_opcode_t) *p1 == start_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have a new highest active register now. This will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get reset at the start_memory we are about to get to, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but we will have saved all the registers relevant to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this repetition op, as described above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = *(p1 + 1) + *(p1 + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = *(p1 + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 (":\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1817:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ^ (Bytecount) string_place); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (string_place, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Pushing failure id: %u\n", failure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* This is the number of items that are pushed and popped on the stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for each register. */ ~~~~~~~~~~~~~~~~~~~~~~ #define NUM_REG_ITEMS 3 ~~~~~~~~~~~~~~~~~~~~~~~~ /* Individual items aside from the registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ #define NUM_NONREG_ITEMS 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We push at most this many items on the stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We used to use (num_regs - 1), which is the number of registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this regexp will save; but that was changed to 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to avoid stack overflow for a regexp with lots of parens. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We actually push this many items. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NUM_FAILURE_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NUM_NONREG_ITEMS) ~~~~~~~~~~~~~~~~~~~ /* How many items can still be added to the stack without overflowing it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Pops what PUSH_FAIL_STACK pushes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We restore into the following parameters, all of which should be lvalues: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STR -- the saved data position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PAT -- the saved pattern position. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LOW_REG, HIGH_REG -- the highest and lowest active registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGSTART, REGEND -- arrays of string positions. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_INFO -- array of information about each subexpression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Also assumes the variables `fail_stack' and (if debugging), `bufp', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pend', `string1', `size1', `string2', and `size2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart, regend, reg_info) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ DEBUG_STATEMENT (int ffailure_id;) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int this_reg; \ ~~~~~~~~~~~~~~~~~~~~~~ const unsigned char *string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Remove failure points and point to how many regs pushed. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("POP_FAILURE_POINT:\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Before pop, next avail: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) fail_stack.size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ DEBUG_STATEMENT (ffailure_id = POP_FAILURE_INT()); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* If the saved string location is NULL, it came from an \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_keep_string_jump opcode, and we want to throw away the \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ saved NULL, thus retaining our current position in the string. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string_temp = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (string_temp != NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ str = string_temp; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ pat = (unsigned char *) POP_FAILURE_POINTER (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Restore register info. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ high_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ low_reg = POP_FAILURE_INT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping failure id: %d\n", ffailure_id); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping string 0x%zx: `", (Bytecount) str); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ } ~ } ~ /* We used to move past the register number and inner group count ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here, when registers were just one byte; that's no longer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ necessary with EXTRACT_NUMBER_AND_INCR(), above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* \ has been turned into a `duplicate' command which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ followed by the numeric value of as the register number. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Already passed through external-to-internal-register mapping, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it refers to the actual group number, not the non-shy-only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numbering used in the external world.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ { ~ REGISTER re_char *d2, *dend2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Get which register to match against. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regno; ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference a group which we've never matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* Where in input to try to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = regstart[regno]; ~~~~~~~~~~~~~~~~~~~~~ /* Where to stop matching; if both the place to start and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the place to stop matching are in the same string, then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set to the place to stop, otherwise, for now have to use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the first string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend2 = ((FIRST_STRING_P (regstart[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == FIRST_STRING_P (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? regend[regno] : end_match_1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ /* If necessary, advance to next segment in register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents. */ ~~~~~~~~~~~~~ while (d2 == dend2) ~~~~~~~~~~~~~~~~~~~ { ~ if (dend2 == end_match_2) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend2 == regend[regno]) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = string2; ~~~~~~~~~~~~~ dend2 = regend[regno]; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* At end of register contents => success */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d2 == dend2) break; ~~~~~~~~~~~~~~~~~~~~~~~ /* If necessary, advance to next segment in data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ /* How many characters left in this segment to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend - d; ~~~~~~~~~~~~~~~~ /* Want how many consecutive characters we can match in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one shot, so, if necessary, adjust the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt > dend2 - d2) ~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend2 - d2; ~~~~~~~~~~~~~~~~~~ /* Compare that many; failure if mismatch, else move ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ past them. */ ~~~~~~~~~~~~~~ if (TRANSLATE_P (translate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bcmp_translate (d, d2, mcnt, translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , fmt, lispobj ~~~~~~~~~~~~~~ #endif ~~~~~~ ) ~ : memcmp (d, d2, mcnt)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ d += mcnt, d2 += mcnt; ~~~~~~~~~~~~~~~~~~~~~~ /* Do this because we've match some characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ } ~ break; ~~~~~~ /* begline matches the empty string at the beginning of the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (unless `not_bol' is set in `bufp'), and, if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `newline_anchor' is set, after newlines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_bol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ re_char *d2 = d; ~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (d2); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* In all other cases, we fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* endline is the dual of begline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_eol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We have to ``prefetch'' the next character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if ((d == end1 ? ~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (string2, fmt, lispobj) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ goto fail; ~~~~~~~~~~ /* Match at the very beginning of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* Match at the very end of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* on_failure_keep_string_jump is used to optimize `.*\n'. It ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pushes NULL as the value for the string on the stack. Then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point' will keep the current value for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string, instead of restoring it. To see why, consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching `foo\nbar' against `.*\n'. The .* matches the foo; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the . fails against the \n. But the next thing we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to do is match the \n against the \n; if we restored the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string value, we would be back at the foo. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this is used only in specific cases, we don't need to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check all the things that `on_failure_jump' does, to make ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sure the right things get saved on the stack. Hence we don't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ share its code. The only reason to push anything on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack at all is that otherwise we would have to change ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `anychar's code to do something besides goto fail in this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case; that seems worse than this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Uses of on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each alternative starts with an on_failure_jump that points ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the beginning of the next alternative. Each alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ except the last ends with a jump that in effect jumps past ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the rest of the alternatives. (They really jump to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending jump of the following alternative, because tensioning ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ these jumps is a hassle.) ~~~~~~~~~~~~~~~~~~~~~~~~~ Repeats start with an on_failure_jump that points past both ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the repetition text and either the following jump or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump back to this on_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ on_failure: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx)", mcnt, (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this on_failure_jump comes right before a group (i.e., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the original * applied to a group), save the information ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for that group and all inner ones, so that if we fail back ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to this point, the group's information will be correct. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, in \(a*\)*\1, we need the preceding group, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and in \(\(a*\)b*\)\2, we need the inner group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We can't use `p' to check ahead because we push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a failure point to `p + mcnt' after we do this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* We need to skip no_op's before we look for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start_memory in case this on_failure_jump is happening as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against aba. */ ~~~~~~~~~~~~~~~~ while (p1 < pend && (re_opcode_t) *p1 == no_op) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1++; ~~~~~ if (p1 < pend && (re_opcode_t) *p1 == start_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have a new highest active register now. This will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get reset at the start_memory we are about to get to, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but we will have saved all the registers relevant to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this repetition op, as described above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = *(p1 + 1) + *(p1 + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = *(p1 + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 (":\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6590:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT (p + mcnt, d, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1877:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Before pop, next avail: %zd\n", \ ^ (Bytecount) fail_stack.avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6767:13: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (sdummy, pdummy, ^~~~~~~~~~~~~~~~~ regex.c:1879:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ^ (Bytecount) fail_stack.size); \ ~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6767:13: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (sdummy, pdummy, ^~~~~~~~~~~~~~~~~ --- select.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include select.c --- regex.o --- regex.c:1901:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Popping string 0x%zx: `", (Bytecount) str); \ ^ ~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ } ~ } ~ /* We used to move past the register number and inner group count ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here, when registers were just one byte; that's no longer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ necessary with EXTRACT_NUMBER_AND_INCR(), above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* \ has been turned into a `duplicate' command which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ followed by the numeric value of as the register number. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Already passed through external-to-internal-register mapping, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it refers to the actual group number, not the non-shy-only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numbering used in the external world.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ { ~ REGISTER re_char *d2, *dend2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Get which register to match against. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regno; ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference a group which we've never matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* Where in input to try to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = regstart[regno]; ~~~~~~~~~~~~~~~~~~~~~ /* Where to stop matching; if both the place to start and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the place to stop matching are in the same string, then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set to the place to stop, otherwise, for now have to use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the first string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend2 = ((FIRST_STRING_P (regstart[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == FIRST_STRING_P (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? regend[regno] : end_match_1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ /* If necessary, advance to next segment in register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents. */ ~~~~~~~~~~~~~ while (d2 == dend2) ~~~~~~~~~~~~~~~~~~~ { ~ if (dend2 == end_match_2) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend2 == regend[regno]) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = string2; ~~~~~~~~~~~~~ dend2 = regend[regno]; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* At end of register contents => success */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d2 == dend2) break; ~~~~~~~~~~~~~~~~~~~~~~~ /* If necessary, advance to next segment in data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ /* How many characters left in this segment to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend - d; ~~~~~~~~~~~~~~~~ /* Want how many consecutive characters we can match in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one shot, so, if necessary, adjust the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt > dend2 - d2) ~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend2 - d2; ~~~~~~~~~~~~~~~~~~ /* Compare that many; failure if mismatch, else move ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ past them. */ ~~~~~~~~~~~~~~ if (TRANSLATE_P (translate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bcmp_translate (d, d2, mcnt, translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , fmt, lispobj ~~~~~~~~~~~~~~ #endif ~~~~~~ ) ~ : memcmp (d, d2, mcnt)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ d += mcnt, d2 += mcnt; ~~~~~~~~~~~~~~~~~~~~~~ /* Do this because we've match some characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ } ~ break; ~~~~~~ /* begline matches the empty string at the beginning of the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (unless `not_bol' is set in `bufp'), and, if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `newline_anchor' is set, after newlines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_bol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ re_char *d2 = d; ~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (d2); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* In all other cases, we fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* endline is the dual of begline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_eol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We have to ``prefetch'' the next character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if ((d == end1 ? ~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (string2, fmt, lispobj) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ goto fail; ~~~~~~~~~~ /* Match at the very beginning of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* Match at the very end of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* on_failure_keep_string_jump is used to optimize `.*\n'. It ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pushes NULL as the value for the string on the stack. Then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point' will keep the current value for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string, instead of restoring it. To see why, consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching `foo\nbar' against `.*\n'. The .* matches the foo; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the . fails against the \n. But the next thing we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to do is match the \n against the \n; if we restored the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string value, we would be back at the foo. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this is used only in specific cases, we don't need to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check all the things that `on_failure_jump' does, to make ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sure the right things get saved on the stack. Hence we don't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ share its code. The only reason to push anything on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack at all is that otherwise we would have to change ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `anychar's code to do something besides goto fail in this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case; that seems worse than this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Uses of on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each alternative starts with an on_failure_jump that points ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the beginning of the next alternative. Each alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ except the last ends with a jump that in effect jumps past ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the rest of the alternatives. (They really jump to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending jump of the following alternative, because tensioning ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ these jumps is a hassle.) ~~~~~~~~~~~~~~~~~~~~~~~~~ Repeats start with an on_failure_jump that points past both ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the repetition text and either the following jump or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump back to this on_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ on_failure: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx)", mcnt, (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this on_failure_jump comes right before a group (i.e., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the original * applied to a group), save the information ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for that group and all inner ones, so that if we fail back ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to this point, the group's information will be correct. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, in \(a*\)*\1, we need the preceding group, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and in \(\(a*\)b*\)\2, we need the inner group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We can't use `p' to check ahead because we push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a failure point to `p + mcnt' after we do this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* We need to skip no_op's before we look for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start_memory in case this on_failure_jump is happening as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against aba. */ ~~~~~~~~~~~~~~~~ while (p1 < pend && (re_opcode_t) *p1 == no_op) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1++; ~~~~~ if (p1 < pend && (re_opcode_t) *p1 == start_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have a new highest active register now. This will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get reset at the start_memory we are about to get to, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but we will have saved all the registers relevant to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this repetition op, as described above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = *(p1 + 1) + *(p1 + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = *(p1 + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 (":\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* A smart repeat ends with `maybe_pop_jump'. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We change it to either `pop_failure_jump' or `jump'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER const unsigned char *p2 = p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Compare the beginning of the repeat with what in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern follows its end. If we can establish that there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is nothing that they would both match, i.e., that we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ would have to backtrack because of (as in, e.g., `a*a') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then we can change to pop_failure_jump, because we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ never have to backtrack. ~~~~~~~~~~~~~~~~~~~~~~~~ This is not true in the case of alternatives: in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `(a|ab)*' we do need to backtrack to the `ab' alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (e.g., if the string was `ab'). But instead of trying to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ detect that here, the alternative has put on a dummy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point which is what we will end up popping. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Skip over open/close-group commands. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If what follows this loop is a ...+ construct, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ look at what begins its body, since we will have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match at least one of that. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p2 + 2 < pend ~~~~~~~~~~~~~~~~~ && ((re_opcode_t) *p2 == stop_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) *p2 == start_memory)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 3; ~~~~~~~~ else if (p2 + 6 < pend ~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p2 == dummy_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 6; ~~~~~~~~ else ~~~~ break; ~~~~~~ } ~ p1 = p + mcnt; ~~~~~~~~~~~~~~ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the `maybe_finalize_jump' of this case. Examine what ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ follows. */ ~~~~~~~~~~~~ /* If we're at the end of the pattern, we can change. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p2 == pend) ~~~~~~~~~~~~~~~ { ~ /* Consider what happens when matching ":\(.*\)" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against ":/". I don't really understand this code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ yet. */ ~~~~~~~~ ((unsigned char *)p)[-3] = (re_char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ~~~~~~~~~~~~~~~~~~ (" End of pattern: change to `pop_failure_jump'.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if ((re_opcode_t) *p2 == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p1[3] == exactn && p1[5] != c) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int not_p = (re_opcode_t) p1[3] == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c < (unsigned char) (p1[4] * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ /* `not_p' is equal to 1 if c would match, which means ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that we can't change to pop_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) ~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ else if ((re_opcode_t) *p2 == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ #ifdef DEBUG ~~~~~~~~~~~~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if ((re_opcode_t) p1[3] == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (p2[2 + p1[5] / BYTEWIDTH] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ & (1 << (p1[5] % BYTEWIDTH))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset_not inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lists every character listed in the charset after. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; idx < (int) p2[1]; idx++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! (p2[2 + idx] == 0 ~~~~~~~~~~~~~~~~~~~~~~~ || (idx < (int) p1[4] ~~~~~~~~~~~~~~~~~~~~~ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1]) ~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *) p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else if ((re_opcode_t) p1[3] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ has no overlap with the one after the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; ~~~~~~~~~~~~~ idx < (int) p2[1] && idx < (int) p1[4]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ idx++) ~~~~~~ if ((p2[2 + idx] & p1[5 + idx]) != 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1] || idx == p1[4]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ } ~ p -= 2; /* Point at relative address again. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p[-1] != pop_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ p[-1] = (unsigned char) jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" Match => jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Note fall through. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The end of a simple repeat has a pop_failure_jump back to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its matching on_failure_jump, where the latter will push a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point. The pop_failure_jump takes off failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ points put on by this pop_failure_jump's matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump; we got through the pattern to here from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching on_failure_jump, so didn't fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We need to pass separate storage for the lowest and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest registers, even though we don't care about the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ actual values. Otherwise, we will restore only one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register from the stack, since lowest will == highest in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point'. */ ~~~~~~~~~~~~~~~~~~~~~~~~ int dummy_low_reg, dummy_high_reg; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pdummy; ~~~~~~~~~~~~~~~~~~~~~~ re_char *sdummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~ USED (sdummy); /* Silence warning. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POP_FAILURE_POINT (sdummy, pdummy, ~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6767:13: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (sdummy, pdummy, ^~~~~~~~~~~~~~~~~ regex.c:1905:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ^ ~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ } ~ } ~ /* We used to move past the register number and inner group count ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here, when registers were just one byte; that's no longer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ necessary with EXTRACT_NUMBER_AND_INCR(), above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* \ has been turned into a `duplicate' command which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ followed by the numeric value of as the register number. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Already passed through external-to-internal-register mapping, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it refers to the actual group number, not the non-shy-only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numbering used in the external world.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ { ~ REGISTER re_char *d2, *dend2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Get which register to match against. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regno; ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference a group which we've never matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* Where in input to try to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = regstart[regno]; ~~~~~~~~~~~~~~~~~~~~~ /* Where to stop matching; if both the place to start and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the place to stop matching are in the same string, then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set to the place to stop, otherwise, for now have to use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the first string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend2 = ((FIRST_STRING_P (regstart[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == FIRST_STRING_P (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? regend[regno] : end_match_1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ /* If necessary, advance to next segment in register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents. */ ~~~~~~~~~~~~~ while (d2 == dend2) ~~~~~~~~~~~~~~~~~~~ { ~ if (dend2 == end_match_2) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend2 == regend[regno]) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = string2; ~~~~~~~~~~~~~ dend2 = regend[regno]; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* At end of register contents => success */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d2 == dend2) break; ~~~~~~~~~~~~~~~~~~~~~~~ /* If necessary, advance to next segment in data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ /* How many characters left in this segment to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend - d; ~~~~~~~~~~~~~~~~ /* Want how many consecutive characters we can match in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one shot, so, if necessary, adjust the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt > dend2 - d2) ~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend2 - d2; ~~~~~~~~~~~~~~~~~~ /* Compare that many; failure if mismatch, else move ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ past them. */ ~~~~~~~~~~~~~~ if (TRANSLATE_P (translate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bcmp_translate (d, d2, mcnt, translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , fmt, lispobj ~~~~~~~~~~~~~~ #endif ~~~~~~ ) ~ : memcmp (d, d2, mcnt)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ d += mcnt, d2 += mcnt; ~~~~~~~~~~~~~~~~~~~~~~ /* Do this because we've match some characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ } ~ break; ~~~~~~ /* begline matches the empty string at the beginning of the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (unless `not_bol' is set in `bufp'), and, if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `newline_anchor' is set, after newlines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_bol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ re_char *d2 = d; ~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (d2); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* In all other cases, we fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* endline is the dual of begline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_eol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We have to ``prefetch'' the next character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if ((d == end1 ? ~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (string2, fmt, lispobj) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ goto fail; ~~~~~~~~~~ /* Match at the very beginning of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* Match at the very end of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* on_failure_keep_string_jump is used to optimize `.*\n'. It ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pushes NULL as the value for the string on the stack. Then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point' will keep the current value for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string, instead of restoring it. To see why, consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching `foo\nbar' against `.*\n'. The .* matches the foo; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the . fails against the \n. But the next thing we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to do is match the \n against the \n; if we restored the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string value, we would be back at the foo. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this is used only in specific cases, we don't need to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check all the things that `on_failure_jump' does, to make ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sure the right things get saved on the stack. Hence we don't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ share its code. The only reason to push anything on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack at all is that otherwise we would have to change ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `anychar's code to do something besides goto fail in this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case; that seems worse than this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Uses of on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each alternative starts with an on_failure_jump that points ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the beginning of the next alternative. Each alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ except the last ends with a jump that in effect jumps past ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the rest of the alternatives. (They really jump to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending jump of the following alternative, because tensioning ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ these jumps is a hassle.) ~~~~~~~~~~~~~~~~~~~~~~~~~ Repeats start with an on_failure_jump that points past both ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the repetition text and either the following jump or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump back to this on_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ on_failure: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx)", mcnt, (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this on_failure_jump comes right before a group (i.e., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the original * applied to a group), save the information ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for that group and all inner ones, so that if we fail back ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to this point, the group's information will be correct. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, in \(a*\)*\1, we need the preceding group, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and in \(\(a*\)b*\)\2, we need the inner group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We can't use `p' to check ahead because we push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a failure point to `p + mcnt' after we do this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* We need to skip no_op's before we look for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start_memory in case this on_failure_jump is happening as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against aba. */ ~~~~~~~~~~~~~~~~ while (p1 < pend && (re_opcode_t) *p1 == no_op) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1++; ~~~~~ if (p1 < pend && (re_opcode_t) *p1 == start_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have a new highest active register now. This will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get reset at the start_memory we are about to get to, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but we will have saved all the registers relevant to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this repetition op, as described above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = *(p1 + 1) + *(p1 + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = *(p1 + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 (":\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* A smart repeat ends with `maybe_pop_jump'. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We change it to either `pop_failure_jump' or `jump'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER const unsigned char *p2 = p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Compare the beginning of the repeat with what in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern follows its end. If we can establish that there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is nothing that they would both match, i.e., that we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ would have to backtrack because of (as in, e.g., `a*a') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then we can change to pop_failure_jump, because we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ never have to backtrack. ~~~~~~~~~~~~~~~~~~~~~~~~ This is not true in the case of alternatives: in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `(a|ab)*' we do need to backtrack to the `ab' alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (e.g., if the string was `ab'). But instead of trying to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ detect that here, the alternative has put on a dummy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point which is what we will end up popping. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Skip over open/close-group commands. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If what follows this loop is a ...+ construct, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ look at what begins its body, since we will have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match at least one of that. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p2 + 2 < pend ~~~~~~~~~~~~~~~~~ && ((re_opcode_t) *p2 == stop_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) *p2 == start_memory)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 3; ~~~~~~~~ else if (p2 + 6 < pend ~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p2 == dummy_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 6; ~~~~~~~~ else ~~~~ break; ~~~~~~ } ~ p1 = p + mcnt; ~~~~~~~~~~~~~~ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the `maybe_finalize_jump' of this case. Examine what ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ follows. */ ~~~~~~~~~~~~ /* If we're at the end of the pattern, we can change. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p2 == pend) ~~~~~~~~~~~~~~~ { ~ /* Consider what happens when matching ":\(.*\)" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against ":/". I don't really understand this code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ yet. */ ~~~~~~~~ ((unsigned char *)p)[-3] = (re_char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ~~~~~~~~~~~~~~~~~~ (" End of pattern: change to `pop_failure_jump'.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if ((re_opcode_t) *p2 == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p1[3] == exactn && p1[5] != c) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int not_p = (re_opcode_t) p1[3] == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c < (unsigned char) (p1[4] * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ /* `not_p' is equal to 1 if c would match, which means ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that we can't change to pop_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) ~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ else if ((re_opcode_t) *p2 == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ #ifdef DEBUG ~~~~~~~~~~~~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if ((re_opcode_t) p1[3] == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (p2[2 + p1[5] / BYTEWIDTH] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ & (1 << (p1[5] % BYTEWIDTH))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset_not inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lists every character listed in the charset after. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; idx < (int) p2[1]; idx++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! (p2[2 + idx] == 0 ~~~~~~~~~~~~~~~~~~~~~~~ || (idx < (int) p1[4] ~~~~~~~~~~~~~~~~~~~~~ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1]) ~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *) p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else if ((re_opcode_t) p1[3] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ has no overlap with the one after the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; ~~~~~~~~~~~~~ idx < (int) p2[1] && idx < (int) p1[4]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ idx++) ~~~~~~ if ((p2[2 + idx] & p1[5 + idx]) != 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1] || idx == p1[4]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ } ~ p -= 2; /* Point at relative address again. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p[-1] != pop_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ p[-1] = (unsigned char) jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" Match => jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Note fall through. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The end of a simple repeat has a pop_failure_jump back to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its matching on_failure_jump, where the latter will push a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point. The pop_failure_jump takes off failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ points put on by this pop_failure_jump's matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump; we got through the pattern to here from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching on_failure_jump, so didn't fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We need to pass separate storage for the lowest and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest registers, even though we don't care about the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ actual values. Otherwise, we will restore only one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register from the stack, since lowest will == highest in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point'. */ ~~~~~~~~~~~~~~~~~~~~~~~~ int dummy_low_reg, dummy_high_reg; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pdummy; ~~~~~~~~~~~~~~~~~~~~~~ re_char *sdummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~ USED (sdummy); /* Silence warning. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POP_FAILURE_POINT (sdummy, pdummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6767:13: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (sdummy, pdummy, ^~~~~~~~~~~~~~~~~ regex.c:1920:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'Bytecount {aka long int}' [-Wformat=] DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ^ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6767:13: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (sdummy, pdummy, ^~~~~~~~~~~~~~~~~ regex.c:1922:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ^ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6767:13: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (sdummy, pdummy, ^~~~~~~~~~~~~~~~~ regex.c:1924:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ^ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6767:13: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (sdummy, pdummy, ^~~~~~~~~~~~~~~~~ regex.c:6781:31: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT2 ("(to 0x%zx).\n", (Bytecount) p); ^ ~~~~~~~~~~~~~ regex.c:789:50: note: in definition of macro 'DEBUG_MATCH_PRINT2' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) ^~ --- select.o --- In file included from select.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- regex.o --- regex.c:1731:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Before push, next avail: %zd\n", \ ^ (Bytecount) (fail_stack).avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1733:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1737:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1756:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 ("\n Doubled stack; size now: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1758:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" slots available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1777:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ^ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1779:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ^ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1781:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" info: 0x%zx\n ", \ ^ * (long *) (®_info[this_reg])); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1814:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing pattern 0x%zx: \n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1817:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6801:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1731:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Before push, next avail: %zd\n", \ ^ (Bytecount) (fail_stack).avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1733:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1737:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1756:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 ("\n Doubled stack; size now: %zd\n", \ ^ (Bytecount) (fail_stack).size); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1758:23: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" slots available: %zd\n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1777:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ^ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1779:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ^ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1781:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" info: 0x%zx\n ", \ ^ * (long *) (®_info[this_reg])); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1814:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing pattern 0x%zx: \n", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:1817:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Pushing string 0x%zx: `", \ ^ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:6814:11: note: in expansion of macro 'PUSH_FAILURE_POINT' PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ^~~~~~~~~~~~~~~~~~ regex.c:6828:36: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT3 (" Setting 0x%zx to %d.\n", (Bytecount) p, ^ ~~~~~~~~~~~~~ regex.c:791:50: note: in definition of macro 'DEBUG_MATCH_PRINT3' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2, x3) ^~ regex.c:6834:35: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT2 (" Setting two bytes from 0x%zx to no_op.\n", ^ (Bytecount) (p+2)); ~~~~~~~~~~~~~~~~~ regex.c:789:50: note: in definition of macro 'DEBUG_MATCH_PRINT2' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) ^~ regex.c:6868:33: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_MATCH_PRINT3 (" Setting 0x%zx to %d.\n", (Bytecount) p2, ^ ~~~~~~~~~~~~~ regex.c:791:50: note: in definition of macro 'DEBUG_MATCH_PRINT3' if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2, x3) ^~ regex.c:1877:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Before pop, next avail: %zd\n", \ ^ (Bytecount) fail_stack.avail); \ ~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:7173:11: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (d, p, ^~~~~~~~~~~~~~~~~ regex.c:1879:26: warning: format '%zd' expects argument of type 'signed size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" size: %zd\n", \ ^ (Bytecount) fail_stack.size); \ ~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:7173:11: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (d, p, ^~~~~~~~~~~~~~~~~ regex.c:1901:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Popping string 0x%zx: `", (Bytecount) str); \ ^ ~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2, size2); \ ~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT1 ("'\n"); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ } ~ } ~ /* We used to move past the register number and inner group count ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here, when registers were just one byte; that's no longer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ necessary with EXTRACT_NUMBER_AND_INCR(), above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* \ has been turned into a `duplicate' command which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ followed by the numeric value of as the register number. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Already passed through external-to-internal-register mapping, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it refers to the actual group number, not the non-shy-only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numbering used in the external world.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ { ~ REGISTER re_char *d2, *dend2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Get which register to match against. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regno; ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference a group which we've never matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* Where in input to try to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = regstart[regno]; ~~~~~~~~~~~~~~~~~~~~~ /* Where to stop matching; if both the place to start and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the place to stop matching are in the same string, then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set to the place to stop, otherwise, for now have to use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the first string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend2 = ((FIRST_STRING_P (regstart[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == FIRST_STRING_P (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? regend[regno] : end_match_1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ /* If necessary, advance to next segment in register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents. */ ~~~~~~~~~~~~~ while (d2 == dend2) ~~~~~~~~~~~~~~~~~~~ { ~ if (dend2 == end_match_2) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend2 == regend[regno]) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = string2; ~~~~~~~~~~~~~ dend2 = regend[regno]; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* At end of register contents => success */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d2 == dend2) break; ~~~~~~~~~~~~~~~~~~~~~~~ /* If necessary, advance to next segment in data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ /* How many characters left in this segment to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend - d; ~~~~~~~~~~~~~~~~ /* Want how many consecutive characters we can match in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one shot, so, if necessary, adjust the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt > dend2 - d2) ~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend2 - d2; ~~~~~~~~~~~~~~~~~~ /* Compare that many; failure if mismatch, else move ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ past them. */ ~~~~~~~~~~~~~~ if (TRANSLATE_P (translate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bcmp_translate (d, d2, mcnt, translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , fmt, lispobj ~~~~~~~~~~~~~~ #endif ~~~~~~ ) ~ : memcmp (d, d2, mcnt)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ d += mcnt, d2 += mcnt; ~~~~~~~~~~~~~~~~~~~~~~ /* Do this because we've match some characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ } ~ break; ~~~~~~ /* begline matches the empty string at the beginning of the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (unless `not_bol' is set in `bufp'), and, if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `newline_anchor' is set, after newlines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_bol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ re_char *d2 = d; ~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (d2); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* In all other cases, we fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* endline is the dual of begline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_eol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We have to ``prefetch'' the next character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if ((d == end1 ? ~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (string2, fmt, lispobj) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ goto fail; ~~~~~~~~~~ /* Match at the very beginning of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* Match at the very end of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* on_failure_keep_string_jump is used to optimize `.*\n'. It ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pushes NULL as the value for the string on the stack. Then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point' will keep the current value for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string, instead of restoring it. To see why, consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching `foo\nbar' against `.*\n'. The .* matches the foo; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the . fails against the \n. But the next thing we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to do is match the \n against the \n; if we restored the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string value, we would be back at the foo. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this is used only in specific cases, we don't need to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check all the things that `on_failure_jump' does, to make ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sure the right things get saved on the stack. Hence we don't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ share its code. The only reason to push anything on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack at all is that otherwise we would have to change ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `anychar's code to do something besides goto fail in this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case; that seems worse than this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Uses of on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each alternative starts with an on_failure_jump that points ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the beginning of the next alternative. Each alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ except the last ends with a jump that in effect jumps past ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the rest of the alternatives. (They really jump to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending jump of the following alternative, because tensioning ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ these jumps is a hassle.) ~~~~~~~~~~~~~~~~~~~~~~~~~ Repeats start with an on_failure_jump that points past both ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the repetition text and either the following jump or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump back to this on_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ on_failure: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx)", mcnt, (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this on_failure_jump comes right before a group (i.e., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the original * applied to a group), save the information ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for that group and all inner ones, so that if we fail back ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to this point, the group's information will be correct. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, in \(a*\)*\1, we need the preceding group, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and in \(\(a*\)b*\)\2, we need the inner group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We can't use `p' to check ahead because we push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a failure point to `p + mcnt' after we do this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* We need to skip no_op's before we look for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start_memory in case this on_failure_jump is happening as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against aba. */ ~~~~~~~~~~~~~~~~ while (p1 < pend && (re_opcode_t) *p1 == no_op) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1++; ~~~~~ if (p1 < pend && (re_opcode_t) *p1 == start_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have a new highest active register now. This will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get reset at the start_memory we are about to get to, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but we will have saved all the registers relevant to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this repetition op, as described above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = *(p1 + 1) + *(p1 + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = *(p1 + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 (":\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* A smart repeat ends with `maybe_pop_jump'. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We change it to either `pop_failure_jump' or `jump'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER const unsigned char *p2 = p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Compare the beginning of the repeat with what in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern follows its end. If we can establish that there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is nothing that they would both match, i.e., that we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ would have to backtrack because of (as in, e.g., `a*a') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then we can change to pop_failure_jump, because we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ never have to backtrack. ~~~~~~~~~~~~~~~~~~~~~~~~ This is not true in the case of alternatives: in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `(a|ab)*' we do need to backtrack to the `ab' alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (e.g., if the string was `ab'). But instead of trying to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ detect that here, the alternative has put on a dummy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point which is what we will end up popping. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Skip over open/close-group commands. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If what follows this loop is a ...+ construct, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ look at what begins its body, since we will have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match at least one of that. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p2 + 2 < pend ~~~~~~~~~~~~~~~~~ && ((re_opcode_t) *p2 == stop_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) *p2 == start_memory)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 3; ~~~~~~~~ else if (p2 + 6 < pend ~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p2 == dummy_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 6; ~~~~~~~~ else ~~~~ break; ~~~~~~ } ~ p1 = p + mcnt; ~~~~~~~~~~~~~~ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the `maybe_finalize_jump' of this case. Examine what ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ follows. */ ~~~~~~~~~~~~ /* If we're at the end of the pattern, we can change. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p2 == pend) ~~~~~~~~~~~~~~~ { ~ /* Consider what happens when matching ":\(.*\)" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against ":/". I don't really understand this code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ yet. */ ~~~~~~~~ ((unsigned char *)p)[-3] = (re_char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ~~~~~~~~~~~~~~~~~~ (" End of pattern: change to `pop_failure_jump'.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if ((re_opcode_t) *p2 == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p1[3] == exactn && p1[5] != c) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int not_p = (re_opcode_t) p1[3] == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c < (unsigned char) (p1[4] * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ /* `not_p' is equal to 1 if c would match, which means ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that we can't change to pop_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) ~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ else if ((re_opcode_t) *p2 == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ #ifdef DEBUG ~~~~~~~~~~~~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if ((re_opcode_t) p1[3] == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (p2[2 + p1[5] / BYTEWIDTH] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ & (1 << (p1[5] % BYTEWIDTH))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset_not inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lists every character listed in the charset after. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; idx < (int) p2[1]; idx++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! (p2[2 + idx] == 0 ~~~~~~~~~~~~~~~~~~~~~~~ || (idx < (int) p1[4] ~~~~~~~~~~~~~~~~~~~~~ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1]) ~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *) p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else if ((re_opcode_t) p1[3] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ has no overlap with the one after the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; ~~~~~~~~~~~~~ idx < (int) p2[1] && idx < (int) p1[4]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ idx++) ~~~~~~ if ((p2[2 + idx] & p1[5 + idx]) != 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1] || idx == p1[4]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ } ~ p -= 2; /* Point at relative address again. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p[-1] != pop_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ p[-1] = (unsigned char) jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" Match => jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Note fall through. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The end of a simple repeat has a pop_failure_jump back to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its matching on_failure_jump, where the latter will push a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point. The pop_failure_jump takes off failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ points put on by this pop_failure_jump's matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump; we got through the pattern to here from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching on_failure_jump, so didn't fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We need to pass separate storage for the lowest and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest registers, even though we don't care about the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ actual values. Otherwise, we will restore only one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register from the stack, since lowest will == highest in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point'. */ ~~~~~~~~~~~~~~~~~~~~~~~~ int dummy_low_reg, dummy_high_reg; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pdummy; ~~~~~~~~~~~~~~~~~~~~~~ re_char *sdummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~ USED (sdummy); /* Silence warning. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POP_FAILURE_POINT (sdummy, pdummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dummy_low_reg, dummy_high_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy, reg_dummy, reg_info_dummy); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ USED (pdummy); ~~~~~~~~~~~~~~ } ~ /* Note fall through. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Unconditionally jump (without popping any failure points). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ unconditional_jump: ~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING jump %d ", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += mcnt; /* Do the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("(to 0x%zx).\n", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* We need this opcode so we can detect where alternatives end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in `group_match_null_string_p' et al. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING jump_past_alt.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Normally, the on_failure_jump pushes a failure point, which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then gets popped at pop_failure_jump. We will end up at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump, also, and with a pattern of, say, `a+', we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are skipping over the on_failure_jump, so we have to push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ something meaningless for pop_failure_jump to pop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING dummy_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It doesn't matter what we push for the string here. What ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the code at `fail' tests is the value for the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ /* At the end of an alternative, we need to push a dummy failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point in case we are followed by a `pop_failure_jump', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we don't want the failure point for the alternative to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ popped. For example, matching `(a|ab)*' against `aab' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ requires that we match the `ab' alternative. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING push_dummy_failure.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* See comments just above at `dummy_failure_jump' about the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ two zeroes. */ ~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Have to succeed matching what follows at least n times. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ After that, handle like `on_failure_jump'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE (mcnt, p + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Originally, this is how many times we HAVE to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt) ~~~~~~~~~ { ~ mcnt--; ~~~~~~~ p += 2; ~~~~~~~ DEBUG_MATCH_PRINT3 (" Setting 0x%zx to %d.\n", (Bytecount) p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt); ~~~~~~ STORE_MATCH_NUMBER_AND_INCR (p, mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ DEBUG_MATCH_PRINT2 (" Setting two bytes from 0x%zx to no_op.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p+2)); ~~~~~~~~~~~~~~~~~~~ STORE_MATCH_NUMBER (p + 2, no_op); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto on_failure; ~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case jump_n: ~~~~~~~~~~~~ EXTRACT_NONNEGATIVE (mcnt, p + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Originally, this is how many times we CAN jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt) ~~~~~~~~~ { ~ mcnt--; ~~~~~~~ STORE_MATCH_NUMBER (p + 2, mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If don't have to jump any more, skip over the rest of command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ p += 4; ~~~~~~~ break; ~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p2; /* Location of the counter. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING set_number_at.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Discard 'const', making re_match_2_internal() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-reentrant. */ ~~~~~~~~~~~~~~~~~~ p2 = (unsigned char *) p + mcnt; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" Setting 0x%zx to %d.\n", (Bytecount) p2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt); ~~~~~~ STORE_MATCH_NUMBER (p2, mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ case wordbound: ~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING wordbound.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ should_succeed = 1; ~~~~~~~~~~~~~~~~~~~ matchwordbound: ~~~~~~~~~~~~~~~ { ~ /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~ /* Straightforward and (I hope) correct implementation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* emch1 is the character before d, syn1 is the syntax of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch1, emch2 is the character at d, and syn2 is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax of emch2. */ ~~~~~~~~~~~~~~~~~~~ Ichar emch1, emch2; ~~~~~~~~~~~~~~~~~~~ int syn1 = 0, ~~~~~~~~~~~~~ syn2 = 0; ~~~~~~~~~ re_char *d_before, *d_after; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int result, ~~~~~~~~~~~ at_beg = AT_STRINGS_BEG (d), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at_end = AT_STRINGS_END (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (at_beg && at_end) ~~~~~~~~~~~~~~~~~~~~~ { ~ result = 0; ~~~~~~~~~~~ } ~ else ~~~~ { ~ if (!at_beg) ~~~~~~~~~~~~ { ~ d_before = POS_BEFORE_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d_before, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch1 = itext_ichar_fmt (d_before, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE (scache, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos ~~~~~~~~~~~~~~~~~~ (lispobj, PTR_TO_OFFSET (d_before))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ syn1 = SYNTAX_FROM_CACHE (scache, emch1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!at_end) ~~~~~~~~~~~~ { ~ d_after = POS_AFTER_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch2 = itext_ichar_fmt (d_after, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE_FORWARD (scache, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos ~~~~~~~~~~~~~~~~~~ (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ syn2 = SYNTAX_FROM_CACHE (scache, emch2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ } ~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (at_beg) ~~~~~~~~~~~ result = (syn2 == Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~ else if (at_end) ~~~~~~~~~~~~~~~~ result = (syn1 == Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ result = ((syn1 == Sword) != (syn2 == Sword)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (result == should_succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ } ~ case notwordbound: ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING notwordbound.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ should_succeed = 0; ~~~~~~~~~~~~~~~~~~~ goto matchwordbound; ~~~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING wordbeg.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ { ~ /* XEmacs: this originally read: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ */ ~~ re_char *dtmp = POS_AFTER_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int tempres; ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE ~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ dtmp = POS_BEFORE_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (dtmp, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE_BACKWARD ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (dtmp))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ } ~ case wordend: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING wordend.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ { ~ /* XEmacs: this originally read: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ The or condition is incorrect (reversed). ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ re_char *dtmp; ~~~~~~~~~~~~~~ Ichar emch; ~~~~~~~~~~~ int tempres; ~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE ~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ dtmp = POS_BEFORE_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (dtmp, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ dtmp = POS_AFTER_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ { ~ re_char *next = d; ~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (next, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE_FORWARD ~~~~~~~~~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (next))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING before_dot.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!BUFFERP (lispobj) ~~~~~~~~~~~~~~~~~~~~~~ || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >= BUF_PT (XBUFFER (lispobj)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ break; ~~~~~~ case at_dot: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING at_dot.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!BUFFERP (lispobj) ~~~~~~~~~~~~~~~~~~~~~~ || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != BUF_PT (XBUFFER (lispobj)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ break; ~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING after_dot.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!BUFFERP (lispobj) ~~~~~~~~~~~~~~~~~~~~~~ || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <= BUF_PT (XBUFFER (lispobj)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ break; ~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING Emacs wordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = (int) Sword; ~~~~~~~~~~~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ should_succeed = 1; ~~~~~~~~~~~~~~~~~~~ matchornotsyntax: ~~~~~~~~~~~~~~~~~ { ~ int matches; ~~~~~~~~~~~~ Ichar emch; ~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE ~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ matches = (SYNTAX_FROM_CACHE (scache, emch) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (matches != should_succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING Emacs notwordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = (int) Sword; ~~~~~~~~~~~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ should_succeed = 0; ~~~~~~~~~~~~~~~~~~~ goto matchornotsyntax; ~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod Mule category code patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ should_succeed = 1; ~~~~~~~~~~~~~~~~~~~ matchornotcategory: ~~~~~~~~~~~~~~~~~~~ { ~ Ichar emch; ~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (check_char_in_category (emch, BUFFER_CATEGORY_TABLE (lispbuf), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt, should_succeed)) ~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ should_succeed = 0; ~~~~~~~~~~~~~~~~~~~ goto matchornotcategory; ~~~~~~~~~~~~~~~~~~~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #else /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (!WORDCHAR_P ((int) (*d))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ d++; ~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (!WORDCHAR_P ((int) (*d))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ d++; ~~~~ break; ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ } ~ continue; /* Successfully executed one pattern command; keep going. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We goto here if a matching operation fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail: ~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* A restart point is known. Restore to that state. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nFAIL:\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POP_FAILURE_POINT (d, p, ~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:7173:11: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (d, p, ^~~~~~~~~~~~~~~~~ regex.c:1905:26: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" Popping pattern 0x%zx: ", (Bytecount) pat); \ ^ ~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ reg_info[this_reg].word = POP_FAILURE_ELT (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[this_reg] = POP_FAILURE_RELOCATABLE (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ if (DEBUG_RUNTIME_FLAGS & RE_DEBUG_FAILURE_POINT) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ \ ~ set_regs_matched_done = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_STATEMENT (nfailure_points_popped++); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) /* POP_FAILURE_POINT */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Structure for per-register (a.k.a. per-group) information. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Other register information, such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting and ending positions (which are addresses), and the list of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner groups (which is a bits list) are maintained in separate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables. ~~~~~~~~~~ We are making a (strictly speaking) nonportable assumption here: that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the compiler will pack our bit fields into something that fits into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the type of `word', i.e., is something that fits into one item on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack. */ ~~~~~~~~~~~~~~~~~~ typedef union ~~~~~~~~~~~~~ { ~ fail_stack_elt_t word; ~~~~~~~~~~~~~~~~~~~~~~ struct ~~~~~~ { ~ /* This field is one if this group can match the empty string, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCH_NULL_UNSET_VALUE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int match_null_string_p : 2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int is_active : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned int ever_matched_something : 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } bits; ~~~~~~~ } register_info_type; ~~~~~~~~~~~~~~~~~~~~~ #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define IS_ACTIVE(R) ((R).bits.is_active) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHED_SOMETHING(R) ((R).bits.matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call this when have matched a real character; it sets `matched' flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the subexpressions which we are currently inside. Also records ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that those subexprs have matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_REGS_MATCHED() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (!set_regs_matched_done) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ int r; \ ~~~~~~~~~~~~~~ set_regs_matched_done = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (r = lowest_active_reg; r <= highest_active_reg; r++) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~ MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = EVER_MATCHED_SOMETHING (reg_info[r]) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = 1; \ ~~~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~ } \ ~~~~~~~~~~~ while (0) ~~~~~~~~~ ~ /* Subroutine declarations and macros for regex_compile. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern---translating it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if necessary. */ ~~~~~~~~~~~~~~~~~ #define PATFETCH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ PATFETCH_RAW (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Fetch the next character in the uncompiled pattern, with no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translation. */ ~~~~~~~~~~~~~~~~ #define PATFETCH_RAW(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do {if (p == pend) return REG_EEND; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); \ ~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Go backwards one character in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define PATUNFETCH DEC_IBYTEPTR (p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If `translate' is non-null, return translate[D], else just D. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cast the subscript to translate because some data is declared as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `char *', to avoid warnings when a string constant is passed. But ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when we use a character as a subscript we must make it unsigned. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define RE_TRANSLATE(d) \ ~~~~~~~~~~~~~~~~~~~~~~~~~ (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for outputting the compiled pattern into `buffer'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer isn't allocated when it comes in, use this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INIT_BUF_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure we have at least N more bytes of space in buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_BUFFER_SPACE(n) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTEND_BUFFER () ~~~~~~~~~~~~~~~~ /* Make sure we have one more byte of buffer space and then add C to it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_2(c1, c2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* As with BUF_PUSH_2, except for three bytes. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define BUF_PUSH_3(c1, c2, c3) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c1); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c2); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = (unsigned char) (c3); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ /* Store a jump with opcode OP at LOC to location TO. We store a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ relative address offset by the three bytes the jump itself occupies. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, (to) - (loc) - 3) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Likewise, for a two-argument jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define STORE_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, (to) - (loc) - 3, arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP(op, loc, to) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op1 (op, loc, (to) - (loc) - 3, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buffer end. */ ~~~~~~~~~~~~~~~ #define INSERT_JUMP2(op, loc, to, arg) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Extend the buffer by twice its current size via realloc and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reset the pointers that pointed into the old block to point to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correct places in the new one. If extending the buffer results in it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ being larger than RE_MAX_BUF_SIZE, then flag memory exhausted. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define EXTEND_BUFFER() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~~ re_char *old_buffer = bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated == RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated <<= 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->allocated > RE_MAX_BUF_SIZE) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = RE_MAX_BUF_SIZE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = \ ~~~~~~~~~~~~~~~~~~~~~~~ (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->buffer == NULL) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the buffer moved, move all the pointers into it. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (old_buffer != bufp->buffer) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~ buf_end = (buf_end - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ begalt = (begalt - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (laststart) \ ~~~~~~~~~~~~~~~~~~~~~~~ laststart = (laststart - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pending_exact) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #define INIT_REG_TRANSLATE_SIZE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since offsets can go either forwards or backwards, this type needs to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ able to hold values from -(RE_MAX_BUF_SIZE - 1) to RE_MAX_BUF_SIZE - 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef int pattern_offset_t; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ pattern_offset_t begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t fixup_alt_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern_offset_t laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum; ~~~~~~~~~~~~~~~~ } compile_stack_elt_t; ~~~~~~~~~~~~~~~~~~~~~~ typedef struct ~~~~~~~~~~~~~~ { ~ compile_stack_elt_t *stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size; ~~~~~~~~~ int avail; /* Offset of next open position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } compile_stack_type; ~~~~~~~~~~~~~~~~~~~~~ #define INIT_COMPILE_STACK_SIZE 32 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The next available element. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set the bit for character C in a bit vector. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_LIST_BIT(c) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |= 1 << (((unsigned char) c) % BYTEWIDTH)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* Set the "bit" for character C in a range table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Parse the longest number we can, but don't produce a bignum, that can't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ correspond to anything we're interested in and would needlessly complicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code. Also avoid the silent overflow issues of the non-emacs code below. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the string at P is not exhausted, leave P pointing at the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (probable-)non-digit byte encountered. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ibyte *_gus_numend = NULL; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object _gus_numno; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* most-positive-fixnum on 32 bit XEmacs is 10 decimal digits, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nine will keep us in fixnum territory no matter our \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ architecture */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount limit = min (pend - p, 9); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ \ ~ /* Require that any digits are ASCII. We already require that \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the user type ASCII in order to type {,(,|, etc, and there is \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the potential for security holes in the future if we allow \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII digits to specify groups in regexps and other \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ code that parses regexps is not aware of this. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gus_numno = parse_integer (p, &_gus_numend, limit, 10, 1, \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vdigit_fixnum_ascii); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (FIXNUMP (_gus_numno) && XREALFIXNUM (_gus_numno) >= 0) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ num = XREALFIXNUM (_gus_numno); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p = _gus_numend; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ /* Get the next unsigned number in the uncompiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define GET_UNSIGNED_NUMBER(num) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { if (p != pend) \ ~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ int _gun_do_unfetch = 1; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~~~ while (ISDIGIT (c)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ if (num < 0) \ ~~~~~~~~~~~~~~~~~~~~ num = 0; \ ~~~~~~~~~~~~~~~~ num = num * 10 + c - '0'; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _gun_do_unfetch = 0; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; \ ~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); \ ~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~ if (_gun_do_unfetch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make sure P points to the next non-digit character. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ /* Map a string to the char class it names (if any). BEG points to the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be parsed and LIMIT is the length, in bytes, of that string. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ XEmacs; this only handles the NAME part of the [:NAME:] specification of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character class name. The GNU emacs version of this function attempts to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle the string from [: onwards, and is called re_wctype_parse. Our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ approach means the function doesn't need to be called with every character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class encountered. ~~~~~~~~~~~~~~~~~~ LENGTH would be a Bytecount if this function didn't need to be compiled ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ also for executables that don't include lisp.h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return RECC_ERROR if STRP doesn't match a known character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_wctype_t ~~~~~~~~~~~ re_wctype (const unsigned char *beg, int limit) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Sort tests in the length=five case by frequency the classes to minimize ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of times we fail the comparison. The frequencies of character class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ names used in Emacs sources as of 2016-07-27: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 [:alnum:] ~~~~~~~~~~~~~ 104 [:alpha:] ~~~~~~~~~~~~~ 62 [:space:] ~~~~~~~~~~~~ 39 [:digit:] ~~~~~~~~~~~~ 36 [:blank:] ~~~~~~~~~~~~ 26 [:word:] ~~~~~~~~~~~ 26 [:upper:] ~~~~~~~~~~~~ 21 [:lower:] ~~~~~~~~~~~~ 10 [:xdigit:] ~~~~~~~~~~~~~ 10 [:punct:] ~~~~~~~~~~~~ 10 [:ascii:] ~~~~~~~~~~~~ 4 [:nonascii:] ~~~~~~~~~~~~~~ 4 [:graph:] ~~~~~~~~~~~ 2 [:print:] ~~~~~~~~~~~ 2 [:cntrl:] ~~~~~~~~~~~ 1 [:ff:] ~~~~~~~~ If you update this list, consider also updating chain of or'ed conditions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in execute_charset function. XEmacs; our equivalent is the condition ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ checking class_bits in the charset_mule and charset_mule_not opcodes. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ switch (limit) { ~~~~~~~~~~~~~~~~ case 4: ~~~~~~~ if (!memcmp (beg, "word", 4)) return RECC_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 5: ~~~~~~~ if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "space", 5)) return RECC_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "digit", 5)) return RECC_DIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "blank", 5)) return RECC_BLANK; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "upper", 5)) return RECC_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "lower", 5)) return RECC_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "punct", 5)) return RECC_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "ascii", 5)) return RECC_ASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "graph", 5)) return RECC_GRAPH; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "print", 5)) return RECC_PRINT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 6: ~~~~~~~ if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 7: ~~~~~~~ if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 8: ~~~~~~~ if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 9: ~~~~~~~ if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return RECC_ERROR; ~~~~~~~~~~~~~~~~~~ } ~ /* True if CH is in the char class CC. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_iswctype (int ch, re_wctype_t cc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG_DECL) ~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ALNUM: return ISALNUM (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return ISALPHA (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: return ISBLANK (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: return ISCNTRL (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_DIGIT: return ISDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_GRAPH: return ISGRAPH (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PRINT: return ISPRINT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return ISPUNCT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return ISSPACE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : !NOCASEP (lispbuf, ch); ~~~~~~~~~~~~~~~~~~~~~~~~~ #else ~~~~~ case RECC_UPPER: return ISUPPER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return ISLOWER (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case RECC_XDIGIT: return ISXDIGIT (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: return ISASCII (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_WORD: return ISWORD (ch) != 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ERROR: return false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ assert (0); ~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ re_wctype_can_match_non_ascii (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ default: ~~~~~~~~ return true; ~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Return a bit-pattern to use in the range-table bits to match multibyte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars of class CC. */ ~~~~~~~~~~~~~~~~~~~~~~ static unsigned char ~~~~~~~~~~~~~~~~~~~~ re_wctype_to_bit (re_wctype_t cc) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_PRINT: case RECC_GRAPH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALPHA: return BIT_ALPHA; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ALNUM: case RECC_WORD: return BIT_WORD; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: return BIT_LOWER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_UPPER: return BIT_UPPER; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_PUNCT: return BIT_PUNCT; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_SPACE: return BIT_SPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ return 0; ~~~~~~~~~ } ~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ ~ static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end); ~~~~~~~~~~~~~~~~~~~~ static re_bool at_begline_loc_p (re_char *pattern, re_char *p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax); ~~~~~~~~~~~~~~~~~~~~~ static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool group_in_compile_stack (compile_stack_type compile_stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum); ~~~~~~~~~~~~~~~~~ static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ unsigned char *b); ~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t compile_extended_range (re_char **p_ptr, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *pend, ~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~ Lisp_Object rtab); ~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte *flags_out); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ static re_bool group_match_null_string_p (re_char **p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool alt_match_null_string_p (re_char *p, re_char *end, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool common_op_match_null_string_p (re_char **p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end, ~~~~~~~~~~~~~ register_info_type *reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int bcmp_translate (re_char *s1, re_char *s2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER int len, RE_TRANSLATE_TYPE translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , Internal_Format fmt, Lisp_Object lispobj ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ ); ~~ static int re_match_2_internal (struct re_pattern_buffer *bufp, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string1, int size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we cannot allocate large objects within re_match_2_internal, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we make the fail stack and register vectors global. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The fail stack, we grow to the maximum size when a regexp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is compiled. ~~~~~~~~~~~~ The register vectors, we adjust in size each time we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile a regexp, according to the number of registers it needs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Size with which the following vectors are currently allocated. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ That is so we can make them bigger as needed, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but never make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static int regs_allocated_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** regstart, ** regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char ** old_regstart, ** old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ static register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Make the register vectors big enough for NUM_REGS registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but don't make them smaller. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static ~~~~~~ regex_grow_registers (int num_regs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs > regs_allocated_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (old_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regstart, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (best_regend, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_dummy, num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (reg_info_dummy, num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs_allocated_size = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Returns one of error codes defined in `regex.h', or zero for success. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assumes the `allocated' (and perhaps `buffer') and `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fields are set in BUFP on entry. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If it succeeds, results are put in BUFP (if it returns an error, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents of BUFP are undefined): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buffer' is the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `syntax' is set to SYNTAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~ `used' is set to the length of the compiled pattern; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `fastmap_accurate' is zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ `re_ngroups' is the number of groups/subexpressions (including shy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups) in PATTERN; ~~~~~~~~~~~~~~~~~~~ `re_nsub' is the number of non-shy groups in PATTERN; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `not_bol' and `not_eol' are zero; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `fastmap' and `newline_anchor' fields are neither ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ examined nor set. */ ~~~~~~~~~~~~~~~~~~~~~ /* Return, freeing storage we allocated. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_STACK_RETURN(value) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~ { \ ~~~~~~~~~ xfree (compile_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return value; \ ~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ regex_compile (re_char *pattern, int size, reg_syntax_t syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_pattern_buffer *bufp) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We fetch characters from PATTERN here. We declare these as int ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (or possibly long) so that chars above 127 can be used as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indices. The macros that fetch a character from the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure to coerce to unsigned char before assigning, so we won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get bitten by negative numbers here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: used to be unsigned char. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER EMACS_INT c, c1; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* A random temporary spot in PATTERN. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ /* Points to the end of the buffer, where we should append. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Keeps track of unclosed groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_type compile_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Points to the current (ending) position in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* How to translate the characters in the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of the count-byte of the most recently inserted `exactn' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ command. This makes it possible to tell if a new exact-match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character can be added to that command or if the character requires ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a new `exactn' command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pending_exact = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of start of the most recently finished expression. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This tells, e.g., postfix * where to find the start of its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operand. Reset at the beginning of groups and alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *laststart = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Address of beginning of regexp, or inside of last group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *begalt; ~~~~~~~~~~~~~~~~~~~~~~ /* Place in the uncompiled pattern (i.e., the {) to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which to go back if the interval is invalid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *beg_interval; ~~~~~~~~~~~~~~~~~~~~~~ /* Address of the place where a forward jump should go to the end of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the containing expression. Each alternative of an `or' -- except the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last -- ends with a forward jump of this sort. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Counts open-groups as they are encountered. Remembered for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching close-group on the compile stack, so the same register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number is put in the stop_memory as the start_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regnum = 0; ~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int debug_count; ~~~~~~~~~~~~~~~~ DEBUG_PRINT1 ("\nCompiling pattern: "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (debug_count = 0; debug_count < size; debug_count++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar (pattern[debug_count]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ putchar ('\n'); ~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ /* Initialize the compile stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ESPACE; ~~~~~~~~~~~~~~~~~~ compile_stack.size = INIT_COMPILE_STACK_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the pattern buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->syntax = syntax; ~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->not_bol = bufp->not_eol = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Set `used' to zero, so that if we return an error, the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ printer (for debugging) will think there's no pattern. We reset it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end. */ ~~~~~~~~~~~~~~~ bufp->used = 0; ~~~~~~~~~~~~~~~ /* Always count groups, whether or not bufp->no_sub is set. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub = 0; ~~~~~~~~~~~~~~~~~~ bufp->re_ngroups = 0; ~~~~~~~~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->external_to_internal_register == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ } ~ { ~ int i; ~~~~~~ bufp->external_to_internal_register[0] = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 1; i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #if !defined (emacs) && !defined (SYNTAX_TABLE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the syntax table. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ init_syntax_once (); ~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if (bufp->allocated == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer) ~~~~~~~~~~~~~~~~~ { /* If zero allocated, but buffer is non-null, try to realloc ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enough space. This loses if buffer's address is bogus, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is the user's responsibility. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { /* Caller did not allocate a buffer. Do it for them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->allocated = INIT_BUF_SIZE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ begalt = buf_end = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Loop through the uncompiled pattern until we're at the end. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '^': ~~~~~~~~~ { ~ if ( /* If at start of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pattern + 1 ~~~~~~~~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's come before. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_begline_loc_p (pattern, p, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (begline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '$': ~~~~~~~~~ { ~ if ( /* If at end of pattern, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p == pend ~~~~~~~~~ /* If context independent, it's an operator. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || syntax & RE_CONTEXT_INDEP_ANCHORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Otherwise, depends on what's next. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || at_endline_loc_p (p, pend, syntax)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (endline); ~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_LIMITED_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ handle_plus: ~~~~~~~~~~~~ case '*': ~~~~~~~~~ /* If there is no previous pattern... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (!(syntax & RE_CONTEXT_INDEP_OPS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ { ~ /* true means zero/many matches are allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool zero_times_ok = c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool many_times_ok = c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* true means match shortest string possible. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool minimal = false; ~~~~~~~~~~~~~~~~~~~~~~~~ /* If there is a sequence of repetition chars, collapse it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down to just one (the right one). We can't combine ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ interval operators with these because of, e.g., `a{2}*', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which should only match an even number of `a's. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (p != pend) ~~~~~~~~~~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == '*' || (!(syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (c == '+' || c == '?'))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; ~ else if (syntax & RE_BK_PLUS_QM && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ if (!(c1 == '+' || c1 == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ c = c1; ~~~~~~~ } ~ else ~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ break; ~~~~~~ } ~ /* If we get here, we found another repeat character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_MINIMAL_MATCHING)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "*?" and "+?" and "??" are okay (and mean match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimally), but other sequences (such as "*??" and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "+++") are rejected (reserved for future use). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal || c != '?') ~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ minimal = true; ~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ zero_times_ok |= c != '+'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ many_times_ok |= c != '?'; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* Star, etc. applied to an empty pattern is equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an empty pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ break; ~~~~~~ /* Now we know whether zero matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether two or more matches is allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and whether we want minimal or maximal matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (minimal) ~~~~~~~~~~~~ { ~ if (!many_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* "a??" becomes: ~~~~~~~~~~~~~~~~~ 0: /on_failure_jump to 6 ~~~~~~~~~~~~~~~~~~~~~~~~ 3: /jump to 9 ~~~~~~~~~~~~~ 6: /exactn/1/A ~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else if (zero_times_ok) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* "a*?" becomes: ~~~~~~~~~~~~~~~~~ 0: /jump to 6 ~~~~~~~~~~~~~ 3: /exactn/1/A ~~~~~~~~~~~~~~ 6: /on_failure_jump to 3 ~~~~~~~~~~~~~~~~~~~~~~~~ 9: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (6); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* "a+?" becomes: ~~~~~~~~~~~~~~~~~ 0: /exactn/1/A ~~~~~~~~~~~~~~ 3: /on_failure_jump to 0 ~~~~~~~~~~~~~~~~~~~~~~~~ 6: end of pattern. ~~~~~~~~~~~~~~~~~~ */ ~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (on_failure_jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* Are we optimizing this jump? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool keep_string_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (many_times_ok) ~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so put in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at the end a backward relative jump from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `buf_end' to before the next jump we're going ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to put in below (which jumps from laststart to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after this jump). ~~~~~~~~~~~~~~~~~ But if we are at the `*' in the exact sequence `.*\n', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert an unconditional jump backwards to the ., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead of the beginning of the loop. This way we only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ push a failure point once, instead of every time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ through the loop. */ ~~~~~~~~~~~~~~~~~~~~~ assert (p - 1 > pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Allocate the space for the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ /* We know we are not at the first character of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern, because laststart was nonzero. And we've ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ already incremented `p', by the way, to be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character after the `*'. Do we have to do something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ analogous here for null bytes, because of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_DOT_NOT_NULL? */ ~~~~~~~~~~~~~~~~~~~ if (*(p - 2) == '.' ~~~~~~~~~~~~~~~~~~~ && zero_times_ok ~~~~~~~~~~~~~~~~ && p < pend && *p == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~ && !(syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* We have .*\n. */ ~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump, buf_end, laststart); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keep_string_p = true; ~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ /* Anything else. */ ~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We've added more stuff to the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* On failure, jump from laststart to buf_end + 3, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which will be the end of the buffer after this jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is inserted. */ ~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : on_failure_jump, ~~~~~~~~~~~~~~~~~~ laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ if (!zero_times_ok) ~~~~~~~~~~~~~~~~~~~ { ~ /* At least one repetition is required, so insert a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dummy_failure_jump' before the initial ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' instruction of the loop. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ effects a skip over that instruction the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we hit that loop. */ ~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '.': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (anychar); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ch >= 0x80) do \ ~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~ goto start_over_with_extended; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else ~~~~~ #define MAYBE_START_OVER_WITH_EXTENDED(ch) (void)(ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ case '[': ~~~~~~~~~ { ~ /* XEmacs change: this whole section */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Ensure that we have enough space to push a charset: the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ opcode, the length count, and the bitset; 34 bytes in all. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (34); ~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ /* We test `*p == '^' twice, instead of using an if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, so we only need one BUF_PUSH. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (*p == '^' ? charset_not : charset); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (*p == '^') ~~~~~~~~~~~~~~ p++; ~~~~ /* Remember the first position in the bracket expression. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* Push the number of bytes in the bitmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear the whole map. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-2] == charset_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* Frumble-bumble, we may have found some extended chars. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Need to start over, process everything using the general ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extended-char mechanism, and need to use charset_mule and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule_not instead of charset and charset_not. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c1); ~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end); ~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ MAYBE_START_OVER_WITH_EXTENDED (*(unsigned char *)p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_range (&p, pend, translate, syntax, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ch; ~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ if (re_wctype_can_match_non_ascii (cc)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ goto start_over_with_extended; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (re_iswctype (ch, cc ~~~~~~~~~~~~~~~~~~~~~~~ RE_ISWCTYPE_ARG (current_buffer))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (ch); ~~~~~~~~~~~~~~~~~~ } ~ } ~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_LIST_BIT ('['); ~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (':'); ~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_LIST_BIT (c); ~~~~~~~~~~~~~~~~~ } ~ } ~ /* Discard any (non)matching list bytes that are all 0 at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the map. Decrease the map-length byte too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-1]--; ~~~~~~~~~~~~~~ buf_end += buf_end[-1]; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ start_over_with_extended: ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Lisp_Object rtab = Qnil; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = 0; ~~~~~~~~~~~~~~~~~~ int bytes_needed = sizeof (flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* There are extended chars here, which means we need to use the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unified range-table format. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (buf_end[-2] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end[-2] = charset_mule; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ buf_end[-2] = charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end--; ~~~~~~~~~~ p = p1; /* go back to the beginning of the charset, after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a possible ^. */ ~~~~~~~~~~~~~~~~ rtab = Vthe_lisp_rangetab; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Fclear_range_table (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* charset_not matches newline according to a syntax bit. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) buf_end[-1] == charset_mule_not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT ('\n'); ~~~~~~~~~~~~~~~~~~~~~~~~ /* Read in characters and ranges, setting map bits. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* \ might escape characters inside [...] and [^...]. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c1); ~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ /* Could be the end of the bracket expression. If it's ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not (i.e., when the bracket expression is `[]' so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ far), the ']' character bit gets set way below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ']' && p != p1 + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character class. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (had_char_class && c == '-' && *p != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ERANGE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Look ahead to see if it's a range when the last thing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was a character: if this is a hyphen not at the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning or the end of a list, then it's the range ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ if (c == '-' ~~~~~~~~~~~~ && !(p - 2 >= pattern && p[-2] == '[') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && *p != ']') ~~~~~~~~~~~~~ { ~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, syntax, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ rtab); ~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (p[0] == '-' && p[1] != ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* This handles ranges made up of characters only. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret; ~~~~~~~~~~~~~~~~~~ /* Move past the `-'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c1); ~~~~~~~~~~~~~~ ret = compile_extended_range (&p, pend, translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax, rtab); ~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See if we're at the beginning of a possible character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *str = p + 1; ~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ c1 = 0; ~~~~~~~ /* If pattern is `[[:'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ PATFETCH (c); ~~~~~~~~~~~~~ if ((c == ':' && *p == ']') || p == pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ c1++; ~~~~~ } ~ /* If isn't a word bracketed by `[:' and `:]': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ undo the ending character, the letters, and leave ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the leading `:' and `[' (but set bits for them). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c == ':' && *p == ']') ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_wctype_t cc = re_wctype (str, c1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_errcode_t ret = REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (cc == RECC_ERROR) ~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECTYPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Throw away the ] at the end of the character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ class. */ ~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ret = compile_char_class (cc, rtab, &flags); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = true; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ c1++; ~~~~~ while (c1--) ~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ SET_RANGETAB_BIT ('['); ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (':'); ~~~~~~~~~~~~~~~~~~~~~~~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ had_char_class = false; ~~~~~~~~~~~~~~~~~~~~~~~ SET_RANGETAB_BIT (c); ~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ bytes_needed += unified_range_table_bytes_needed (rtab); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (bytes_needed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *buf_end++ = flags; ~~~~~~~~~~~~~~~~~~~ unified_range_table_copy_data (rtab, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += unified_range_table_bytes_used (buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_open; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_close; ~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\n': ~~~~~~~~~~ if (syntax & RE_NEWLINE_ALT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '|': ~~~~~~~~~ if (syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_alt; ~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '{': ~~~~~~~~~ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_interval; ~~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ case '\\': ~~~~~~~~~~ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not translate the character after the \, so that we can ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ distinguish, e.g., \B from \b, even if we normally would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ translate, e.g., B to b. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case '(': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_open: ~~~~~~~~~~~~ { ~ regnum_t r = 0; ~~~~~~~~~~~~~~~ re_bool shy = 0, named_nonshy = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_NO_SHY_GROUPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p != pend && itext_ichar_eql (p, '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ INC_IBYTEPTR (p); /* Gobble up the '?'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); /* Fetch the next character, which may be a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ digit. */ ~~~~~~~~~ switch (c) ~~~~~~~~~~ { ~ case ':': /* shy groups */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ shy = 1; ~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '5': case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (r); ~~~~~~~~~~~~~~~~~~~~~~~~ if (itext_ichar_eql (p, ':')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ named_nonshy = 1; ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (p); /* Gobble up the ':'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Otherwise, fall through and error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* An explicitly specified regnum must start with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-0. */ ~~~~~~~~~ case '0': ~~~~~~~~~ default: ~~~~~~~~ FREE_STACK_RETURN (REG_BADPAT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ++regnum; ~~~~~~~~~ bufp->re_ngroups++; ~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups > MAX_REGNUM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!shy) ~~~~~~~~~ { ~ if (named_nonshy) ~~~~~~~~~~~~~~~~~ { ~ if (r < bufp->external_to_internal_register_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (group_in_compile_stack ~~~~~~~~~~~~~~~~~~~~~~~~~~ (compile_stack, ~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* GNU errors in this context, which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inconsistent; it otherwise has no problem ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with named non-shy groups overriding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ previously-assigned group numbers. I choose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to error here for consistency with GNU for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ those writing code that should target ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ both. */ ~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ if (r > bufp->re_nsub) ~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->re_nsub = r; ~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ r = ++(bufp->re_nsub); ~~~~~~~~~~~~~~~~~~~~~~ } ~ while (bufp->external_to_internal_register_size <= ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_nsub) ~~~~~~~~~~~~~~ { ~ int i; ~~~~~~ int old_size = ~~~~~~~~~~~~~~ bufp->external_to_internal_register_size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ += max (old_size + 5, bufp->re_nsub + 5); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (bufp->external_to_internal_register, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register_size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int); ~~~~~ for (i = old_size; ~~~~~~~~~~~~~~~~~~ i < bufp->external_to_internal_register_size; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[i] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (int) 0xDEADBEEF; ~~~~~~~~~~~~~~~~~ } ~ /* This is explicitly [r] rather than [bufp->re_nsub] for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the case that the named nonshy group references an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unused register number less than bufp->re_nsub. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->external_to_internal_register[r] = ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->re_ngroups; ~~~~~~~~~~~~~~~~~ } ~ if (COMPILE_STACK_FULL) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ RETALLOC (compile_stack.stack, compile_stack.size << 1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack_elt_t); ~~~~~~~~~~~~~~~~~~~~~ if (compile_stack.stack == NULL) return REG_ESPACE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.size <<= 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* These are the values to restore when we hit end of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group. They are all relative offsets, so that if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ whole pattern moves because of realloc, they will still ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be valid. */ ~~~~~~~~~~~~~ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.regnum = bufp->re_ngroups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPILE_STACK_TOP.inner_group_offset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = buf_end - bufp->buffer + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We will eventually replace the 0 with the number of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups inner to this one, using inner_group_offset, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ above. */ ~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (start_memory, buf_end, bufp->re_ngroups, 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ compile_stack.avail++; ~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = 0; ~~~~~~~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case ')': ~~~~~~~~~ if (syntax & RE_NO_BK_PARENS) goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ handle_close: ~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ { /* Push a dummy failure point at the end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ alternative for a possible future ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_jump' to pop. See comments at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `push_dummy_failure' in `re_match_2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (push_dummy_failure); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We allocated space for this jump when we assigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to `fixup_alt_jump', in the `handle_alt' case below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* See similar code for backslashed left paren above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ else ~~~~ FREE_STACK_RETURN (REG_ERPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Since we just checked for an empty stack above, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``can't happen''. */ ~~~~~~~~~~~~~~~~~~~~~ assert (compile_stack.avail != 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We don't just want to restore into `regnum', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ later groups should continue to be numbered higher, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as in `(ab)c(de)' -- the second group is #2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t this_group_regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *inner_group_loc; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compile_stack.avail--; ~~~~~~~~~~~~~~~~~~~~~~ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump ~~~~~~~~~~~~~~ = COMPILE_STACK_TOP.fixup_alt_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : 0; ~~~~ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_group_regnum = COMPILE_STACK_TOP.regnum; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ /* We're at the end of the group, so now we know how many ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ groups were inside this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inner_group_loc ~~~~~~~~~~~~~~~ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (inner_group_loc, regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (5); ~~~~~~~~~~~~~~~~~~~~~ store_op2 (stop_memory, buf_end, this_group_regnum, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum - this_group_regnum); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '|': /* `\|'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ handle_alt: ~~~~~~~~~~~ if (syntax & RE_LIMITED_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ /* Insert before the previous alternative a jump which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jumps to this alternative if the former fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ /* The alternative before this one has a jump after it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ which gets executed if it gets matched. Adjust that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump so it will jump to this alternative's analogous ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump (put in below, which in turn will jump to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (if any) alternative's such jump, etc.). The last such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump jumps to the correct final destination. A picture: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _____ _____ ~~~~~~~~~~~ | | | | ~~~~~~~~~~~ | v | v ~~~~~~~~~~~ a | b | c ~~~~~~~~~~~ If we are at `b', then fixup_alt_jump right now points to a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ three-byte space after `a'. We'll put in the jump, set ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump to right after `b', and leave behind three ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes which we'll fill in when we get to after `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Mark and leave space for a jump after this alternative, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to be filled in later either by next alternative or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ when know we're at the end of a series of alternatives. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fixup_alt_jump = buf_end; ~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ laststart = 0; ~~~~~~~~~~~~~~ begalt = buf_end; ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '{': ~~~~~~~~~ /* If \{ is a literal. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we're at `\{' and it's not the open-interval ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operator. */ ~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p - 2 == pattern && p == pend)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ #define BAD_INTERVAL(errnum) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_BRACES) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unfetch_interval; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (errnum); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ handle_interval: ~~~~~~~~~~~~~~~~ { ~ /* If got here, then the syntax allows intervals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* At least (most) this many matches must be made. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lower_bound = 0, upper_bound = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beg_interval = p - 1; ~~~~~~~~~~~~~~~~~~~~~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ GET_UNSIGNED_NUMBER (lower_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (c == ',') ~~~~~~~~~~~~~ { ~ if (p == pend || itext_ichar_eql (p, '+')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_UNSIGNED_NUMBER (upper_bound); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound < 0) upper_bound = RE_DUP_MAX; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* Interval such as `{1}' => match exactly once. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound = lower_bound; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (lower_bound > upper_bound) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (upper_bound > RE_DUP_MAX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_ESIZEBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (c != '\\') ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADBR); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EESCAPE); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ } ~ if (c != '}') ~~~~~~~~~~~~~ { ~ BAD_INTERVAL (REG_EBRACE); ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We just parsed a valid interval. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It's invalid to have no preceding RE. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!laststart) ~~~~~~~~~~~~~~~ { ~ if (syntax & RE_CONTEXT_INVALID_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_BADRPT); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (syntax & RE_CONTEXT_INDEP_OPS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ else ~~~~ goto unfetch_interval; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If the upper bound is zero, don't want to succeed at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all; jump from `laststart' to `b + 3', which will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the buffer after we insert the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (upper_bound == 0) ~~~~~~~~~~~~~~~~~~~~~ { ~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP (jump, laststart, buf_end + 3); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ /* Otherwise, we have a nontrivial interval. When ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we're all done, the pattern will look like: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_number_at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ jump_n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (The upper bound and `jump_n' are omitted if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `upper_bound' is 1, though.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { /* If the upper bound is > 1, we need to insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ more at the end of the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int nbytes = 10 + (upper_bound > 1) * 10; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (nbytes); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize lower bound of the `succeed_n', even ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ though it will be set during matching by its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attendant `set_number_at' (inserted next), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because `re_compile_fastmap' needs to know. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jump to the `jump_n' we might insert below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INSERT_JUMP2 (succeed_n, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end + 5 + (upper_bound > 1) * 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lower_bound); ~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* Code to initialize the lower bound. Insert ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ before the `succeed_n'. The `5' is the last two ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes of this `set_number_at', plus 3 bytes of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the following `succeed_n'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ if (upper_bound > 1) ~~~~~~~~~~~~~~~~~~~~ { /* More than one repetition is allowed, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ append a backward jump to the `succeed_n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that starts this interval. ~~~~~~~~~~~~~~~~~~~~~~~~~~ When we've reached this during matching, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we'll have matched the interval once, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump back only `upper_bound - 1' times. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_JUMP2 (jump_n, buf_end, laststart + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upper_bound - 1); ~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ /* The location we want to set is the second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ parameter of the `jump_n'; that is `b-2' as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an absolute address. `laststart' will be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the `set_number_at' we're about to insert; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `laststart+3' the number to set, the source ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the relative address. But we are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inserting into the middle of the pattern -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so everything is getting moved up by 5. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conclusion: (b - 2) - (laststart + 3) + 5, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i.e., b - laststart. ~~~~~~~~~~~~~~~~~~~~ We insert this at the beginning of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ so that if we fail during matching, we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reinitialize the bounds. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ insert_op2 (set_number_at, laststart, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end - laststart, ~~~~~~~~~~~~~~~~~~~~ upper_bound - 1, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 5; ~~~~~~~~~~~~~ } ~ } ~ pending_exact = 0; ~~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #undef BAD_INTERVAL ~~~~~~~~~~~~~~~~~~~ unfetch_interval: ~~~~~~~~~~~~~~~~~ /* If an invalid interval, match the characters as literals. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (beg_interval); ~~~~~~~~~~~~~~~~~~~~~~ p = beg_interval; ~~~~~~~~~~~~~~~~~ beg_interval = NULL; ~~~~~~~~~~~~~~~~~~~~ /* normal_char and normal_backslash need `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ if (!(syntax & RE_NO_BK_BRACES)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (p > pattern && p[-1] == '\\') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* There is no way to specify the before_dot and after_dot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operators. rms says this is ok. --karl */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '=': ~~~~~~~~~ BUF_PUSH (at_dot); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 's': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'S': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH (c); ~~~~~~~~~~~~~ /* XEmacs addition */ ~~~~~~~~~~~~~~~~~~~~~ if (c >= 0x80 || syntax_spec_code[c] == 0377) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESYNTAX); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case 'c': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (categoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'C': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ PATFETCH_RAW (c); ~~~~~~~~~~~~~~~~~ if (c < 32 || c > 127) ~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ECATEGORY); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (notcategoryspec, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case 'w': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (wordchar); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'W': ~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (notwordchar); ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '<': ~~~~~~~~~ BUF_PUSH (wordbeg); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '>': ~~~~~~~~~ BUF_PUSH (wordend); ~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'b': ~~~~~~~~~ BUF_PUSH (wordbound); ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case 'B': ~~~~~~~~~ BUF_PUSH (notwordbound); ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '`': ~~~~~~~~~ BUF_PUSH (begbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '\'': ~~~~~~~~~~ BUF_PUSH (endbuf); ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case '1': case '2': case '3': case '4': case '5': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case '6': case '7': case '8': case '9': ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regnum_t reg = -1, regint; ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_BK_REFS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ PATUNFETCH; ~~~~~~~~~~~ GET_UNSIGNED_NUMBER (reg); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Progressively divide down the backreference until we find ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one that corresponds to an existing register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10 && ~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_MULTI_DIGIT_BK_REFS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF))) ~~~~~~~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg /= 10; ~~~~~~~~~~ } ~ if (reg > bufp->re_nsub ~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->external_to_internal_register[reg] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == (int) 0xDEADBEEF)) ~~~~~~~~~~~~~~~~~~~~~ { ~ /* \N with one digit with a non-existing group has always ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ been a syntax error. ~~~~~~~~~~~~~~~~~~~~ GNU as of Fr 27 Mär 2020 16:24:07 GMT do not accept ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ multidigit backreferences; if they did there would be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an argument for this not being an error for those ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreferences that are less than some known named ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ backreference. As it is currently we should error, this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ will give those writing code for XEmacs better ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ feedback. */ ~~~~~~~~~~~~ FREE_STACK_RETURN (REG_ESUBREG); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regint = bufp->external_to_internal_register[reg]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference to a subexpression if inside of it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (group_in_compile_stack (compile_stack, regint)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Check REG, not REGINT. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (reg > 10) ~~~~~~~~~~~~~~~~ { ~ PATUNFETCH; ~~~~~~~~~~~ reg = reg / 10; ~~~~~~~~~~~~~~~ } ~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ if (reg > 9 && ~~~~~~~~~~~~~~ bufp->warned_about_incompatible_back_references == 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->warned_about_incompatible_back_references = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warn_when_safe (intern ("regex"), Qinfo, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "Back reference \\%d now has new " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "semantics in %s", reg, pattern); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ GET_BUFFER_SPACE (3); ~~~~~~~~~~~~~~~~~~~~~ store_op1 (duplicate, buf_end, regint); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ buf_end += 3; ~~~~~~~~~~~~~ } ~ break; ~~~~~~ case '+': ~~~~~~~~~ case '?': ~~~~~~~~~ if (syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_plus; ~~~~~~~~~~~~~~~~~ else ~~~~ goto normal_backslash; ~~~~~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ normal_backslash: ~~~~~~~~~~~~~~~~~ /* You might think it would be useful for \ to mean ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not to translate; but if we don't translate it, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it will never match anything. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ goto normal_char; ~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ default: ~~~~~~~~ /* Expects the character in `c'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `p' points to the location after where `c' came from. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ normal_char: ~~~~~~~~~~~~ { ~ /* The following conditional synced to GNU Emacs 22.1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If no exactn currently being built. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!pending_exact ~~~~~~~~~~~~~~~~~~ /* If last exactn not at current position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || pending_exact + *pending_exact + 1 != buf_end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have only one byte following the exactn for the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || *pending_exact >= (1 << BYTEWIDTH) - MAX_ICHAR_LEN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If followed by a repetition operator. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the lookahead fails because of end of pattern, any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing backslash will get caught later. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (p != pend && (*p == '*' || *p == '^')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_BK_PLUS_QM) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : p != pend && (*p == '+' || *p == '?')) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || ((syntax & RE_INTERVALS) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ((syntax & RE_NO_BK_BRACES) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? p != pend && *p == '{' ~~~~~~~~~~~~~~~~~~~~~~~~ : p + 1 < pend && (p[0] == '\\' && p[1] == '{')))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Start building a new exactn. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ laststart = buf_end; ~~~~~~~~~~~~~~~~~~~~ BUF_PUSH_2 (exactn, 0); ~~~~~~~~~~~~~~~~~~~~~~~ pending_exact = buf_end - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #ifndef MULE ~~~~~~~~~~~~ BUF_PUSH (c); ~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ #else ~~~~~ { ~ Bytecount bt_count; ~~~~~~~~~~~~~~~~~~~ Ibyte tmp_buf[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int i; ~~~~~~ bt_count = set_itext_ichar (tmp_buf, c); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (i = 0; i < bt_count; i++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BUF_PUSH (tmp_buf[i]); ~~~~~~~~~~~~~~~~~~~~~~ (*pending_exact)++; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif ~~~~~~ break; ~~~~~~ } ~ } /* switch (c) */ ~~~~~~~~~~~~~~~~~~ } /* while p != pend */ ~~~~~~~~~~~~~~~~~~~~~~~ /* Through the pattern now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fixup_alt_jump) ~~~~~~~~~~~~~~~~~~~ STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!COMPILE_STACK_EMPTY) ~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_STACK_RETURN (REG_EPAREN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we don't want backtracking, force success ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first time we reach the end of the compiled pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (syntax & RE_NO_POSIX_BACKTRACKING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUF_PUSH (succeed); ~~~~~~~~~~~~~~~~~~~ xfree (compile_stack.stack); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We have succeeded; set the length of the buffer. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->used = buf_end - bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ if (debug_regexps & RE_DEBUG_COMPILATION) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ DEBUG_PRINT1 ("\nCompiled pattern: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ print_compiled_pattern (bufp); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* DEBUG */ ~~~~~~~~~~~~~~~~~~ #ifndef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Initialize the failure stack to the largest possible stack. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessary unless we're trying to avoid calling alloca in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the search and match routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is strictly greater than re_max_failures, the largest possible stack ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is 2 * re_max_failures failure points. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! fail_stack.stack) ~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xmalloc (fail_stack.size ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ fail_stack.stack ~~~~~~~~~~~~~~~~ = (fail_stack_elt_t *) xrealloc (fail_stack.stack, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (fail_stack.size ~~~~~~~~~~~~~~~~ * sizeof (fail_stack_elt_t))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ regex_grow_registers (num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } /* regex_compile */ ~~~~~~~~~~~~~~~~~~~~~ ~ /* Subroutines for `regex_compile'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Store OP at LOC followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op1 (re_opcode_t op, unsigned char *loc, int arg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *loc = (unsigned char) op; ~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 1, arg1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STORE_NUMBER (loc + 3, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Copy the bytes from LOC to END to open up three bytes of space at LOC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for OP followed by two-byte integer parameter ARG. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 3; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op1 (op, loc, arg); ~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static void ~~~~~~~~~~~ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *end) ~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char *pfrom = end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER unsigned char *pto = end + 5; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (pfrom != loc) ~~~~~~~~~~~~~~~~~~~~ *--pto = *--pfrom; ~~~~~~~~~~~~~~~~~~ store_op2 (op, loc, arg1, arg2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* P points to just after a ^ in PATTERN. Return true if that ^ comes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ after an alternative or a begin-subexpression. We assume there is at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ least one character before the ^. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *prev = p - 2; ~~~~~~~~~~~~~~~~~~~~~~ re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* After a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* After an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* The dual of at_begline_loc_p. This one is for $. We assume there is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one character after the $, i.e., `P < PEND'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ at_endline_loc_p (re_char *p, re_char *pend, int syntax) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *next = p; ~~~~~~~~~~~~~~~~~~ re_bool next_backslash = *next == '\\'; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *next_next = p + 1 < pend ? p + 1 : 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return ~~~~~~ /* Before a subexpression? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (syntax & RE_NO_BK_PARENS ? *next == ')' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == ')') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Before an alternative? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (syntax & RE_NO_BK_VBAR ? *next == '|' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : next_backslash && next_next && *next_next == '|'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Returns true if REGNUM is in one of COMPILE_STACK's elements and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ false if it's not. */ ~~~~~~~~~~~~~~~~~~~~~~ static re_bool ~~~~~~~~~~~~~~ group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int this_element; ~~~~~~~~~~~~~~~~~ for (this_element = compile_stack.avail - 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this_element >= 0; ~~~~~~~~~~~~~~~~~~ this_element--) ~~~~~~~~~~~~~~~ if (compile_stack.stack[this_element].regnum == regnum) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return true; ~~~~~~~~~~~~ return false; ~~~~~~~~~~~~~ } ~ /* Read the ending character of a range (in a bracket expression) from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ uncompiled pattern *P_PTR (which ends at PEND). We assume the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ starting character is in `P[-2]'. (`P[-1]' is the character `-'.) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Then we set the translation of all bits between the starting and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending characters (inclusive) in the compiled pattern B. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Return an error code. ~~~~~~~~~~~~~~~~~~~~~ We use these short variable names so we can use the same macros as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `regex_compile' itself. ~~~~~~~~~~~~~~~~~~~~~~~ Under Mule, this is only called when both chars of the range are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ASCII. */ ~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, unsigned char *buf_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char; ~~~~~~~~~~~~~~~~ re_char *p = *p_ptr; ~~~~~~~~~~~~~~~~~~~~ int range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ /* Even though the pattern is a signed `char *', we need to fetch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with unsigned char *'s; if the high bit of the pattern character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is set, the range endpoints will be negative if we fetch using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ signed char *. ~~~~~~~~~~~~~~ We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = ((const unsigned char *) p)[-2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = ((const unsigned char *) p)[0]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Have to increment the pointer into the pattern string, so the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ caller isn't still at the ending character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (*p_ptr)++; ~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Here we see why `this_char' has to be larger than an `unsigned ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ char' -- the range is inclusive, so if `range_end' == 0xff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (assuming 8-bit characters), we would otherwise go into an infinite ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, since all characters <= 0xff. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_LIST_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ static reg_errcode_t ~~~~~~~~~~~~~~~~~~~~ compile_extended_range (re_char **p_ptr, re_char *pend, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_syntax_t syntax, Lisp_Object rtab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ichar this_char, range_start, range_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const Ibyte *p; ~~~~~~~~~~~~~~~ if (*p_ptr == pend) ~~~~~~~~~~~~~~~~~~~ return REG_ERANGE; ~~~~~~~~~~~~~~~~~~ p = (const Ibyte *) *p_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_end = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p--; /* back to '-' */ ~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (p); /* back to start of range */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We also want to fetch the endpoints without translating them; the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ appropriate translation is done in the bit-setting loop below. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range_start = itext_ichar (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (*p_ptr); ~~~~~~~~~~~~~~~~~~~~~~ /* If the start is after the end, the range is empty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range_start > range_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't have ranges spanning different charsets, except maybe for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ranges entirely within the first 256 chars. (The intent of this is that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the effect of such a range would be unpredictable, since there is no ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined ordering over charsets and the particular assignment of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset ID's is arbitrary.) This does not apply to Unicode, with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ well-defined character values. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((range_start >= 0x100 || range_end >= 0x100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !EQ (old_mule_ichar_charset (range_start), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_mule_ichar_charset (range_end))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_ERANGESPAN; ~~~~~~~~~~~~~~~~~~~~~~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### This might be way inefficient if the range encompasses 10,000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars or something. To be efficient, you'd have to do something like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this: ~~~~~ range_table a ~~~~~~~~~~~~~ range_table b; ~~~~~~~~~~~~~~ map_char_table (translation table, [range_start, range_end]) of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lambda (ch, translation): ~~~~~~~~~~~~~~~~~~~~~~~~~ put (ch, Qt) in a ~~~~~~~~~~~~~~~~~ put (translation, Qt) in b ~~~~~~~~~~~~~~~~~~~~~~~~~~ invert the range in a and truncate to [range_start, range_end] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put the union of a, b in rtab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is to say, we want to map every character that has a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to its translation, and other characters to themselves. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This assumes, as is reasonable in practice, that a translation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ table maps individual characters to their translation, and does ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not generally map multiple characters to the same translation. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ for (this_char = range_start; this_char <= range_end; this_char++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else ~~~~ put_range_table (rtab, range_start, range_end, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ reg_errcode_t ~~~~~~~~~~~~~ compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ *flags_out |= re_wctype_to_bit (cc); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch (cc) ~~~~~~~~~~~ { ~ case RECC_ASCII: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0, 0x7f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_XDIGIT: ~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'a', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 'A', 'f', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* fallthrough */ ~~~~~~~~~~~~~~~~~ case RECC_DIGIT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '0', '9', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_BLANK: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', ' ', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, '\t', '\t', Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_PRINT: ~~~~~~~~~~~~~~~~ put_range_table (rtab, ' ', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_GRAPH: ~~~~~~~~~~~~~~~~ put_range_table (rtab, '!', 0x7e, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_NONASCII: ~~~~~~~~~~~~~~~~~~~ case RECC_MULTIBYTE: ~~~~~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x80, CHAR_CODE_LIMIT, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_CNTRL: ~~~~~~~~~~~~~~~~ put_range_table (rtab, 0x00, 0x1f, Qt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case RECC_UNIBYTE: ~~~~~~~~~~~~~~~~~~ /* Never true in XEmacs. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* The following all have their own bits in the class_bits argument to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset_mule and charset_mule_not, they don't use the range table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information. */ ~~~~~~~~~~~~~~~ case RECC_ALPHA: ~~~~~~~~~~~~~~~~ case RECC_WORD: ~~~~~~~~~~~~~~~ case RECC_ALNUM: /* Equivalent to RECC_WORD */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case RECC_LOWER: ~~~~~~~~~~~~~~~~ case RECC_PUNCT: ~~~~~~~~~~~~~~~~ case RECC_SPACE: ~~~~~~~~~~~~~~~~ case RECC_UPPER: ~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ return REG_NOERROR; ~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ ~ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters can start a string that matches the pattern. This fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is used by re_search to skip quickly over impossible starting points. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The caller must supply the address of a (1 << BYTEWIDTH)-byte data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ area as BUFP->fastmap. ~~~~~~~~~~~~~~~~~~~~~~ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the pattern buffer. ~~~~~~~~~~~~~~~~~~~ Returns 0 if we succeed, -2 if an internal error. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_compile_fastmap (struct re_pattern_buffer *bufp ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_SHORT_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int j, k; ~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We don't push any register information onto the failure stack. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* &&#### this should be changed for 8-bit-fixed, for efficiency. see ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ comment marked with &&#### in re_search_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pattern = bufp->buffer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ long size = bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~ re_char *p = pattern; ~~~~~~~~~~~~~~~~~~~~~ REGISTER re_char *pend = pattern + size; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Assume that each path through the pattern can be null until ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ proven otherwise. We set this false at the bottom of switch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ statement, to which we get only if a particular path doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We aren't doing a `succeed_n' to begin with. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The pattern comes from string data, not buffer data. We don't access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any buffer data, so we don't have to worry about malloc() (but the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ disallowed flag may have been set by a caller). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ assert (fastmap != NULL && p != NULL); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->fastmap_accurate = 1; /* It will be when we're done. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 0; ~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p == pend || *p == succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have reached the (effective) end of pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Reset for next path. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = true; ~~~~~~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ } ~ else ~~~~ break; ~~~~~~ } ~ /* We should never be about to go beyond the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (p < pend); ~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* I guess the idea here is to simply not bother with a fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if a backreference is used, since it's too hard to figure out ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap for the corresponding group. Setting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `can_be_null' stops `re_search_2' from using the fastmap, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that is all we do. */ ~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Following are the cases which match a character. These end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with `break'. */ ~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ fastmap[p[1]] = 1; ~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ /* XEmacs: Under Mule, these bit vectors will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only contain values for characters below 0x80. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ /* Chars beyond end of map must be allowed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* And all extended characters must be allowed, too. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = first; jj <= last && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ /* Ranges below 0x100 can span charsets, but there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are only two (Control-1 and Latin-1), and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ either first or last has to be in them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ if (last < 0x100) ~~~~~~~~~~~~~~~~~ { ~ set_itext_ichar (strr, last); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[*strr] = 1; ~~~~~~~~~~~~~~~~~~~ } ~ else if (CHAR_CODE_LIMIT == last) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* This is RECC_MULTIBYTE or RECC_NONASCII; true for all ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-ASCII characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~ jj = 0x80; ~~~~~~~~~~ while (jj < 0xA0) ~~~~~~~~~~~~~~~~~ { ~ fastmap[jj++] = 1; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #else ~~~~~ /* Ranges can span charsets. We depend on the fact that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead bytes are monotonically non-decreasing as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ character values increase. @@#### This is a fairly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reasonable assumption in general (but DOES NOT WORK in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old Mule due to the ordering of private dimension-1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars before official dimension-2 chars), and introduces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a dependency on the particular representation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ Ibyte strrlast[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (strrlast, min (last, CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = *strr; jj <= *strrlast; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ } ~ #endif /* not UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ int nentries; ~~~~~~~~~~~~~ int smallest_prev = 0; ~~~~~~~~~~~~~~~~~~~~~~ Bitbyte flags = *p++; ~~~~~~~~~~~~~~~~~~~~~ if (flags) ~~~~~~~~~~ { ~ /* We need to consult the syntax table, fastmap won't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ work. */ ~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ } ~ nentries = unified_range_table_nentries ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef UNICODE_INTERNAL ~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ for (jj = smallest_prev; jj < first && jj < 0x80; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = last + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~ if (smallest_prev >= 0x80) ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Also set lead bytes after the end */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* Calculating which lead bytes are actually allowed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here is rather difficult, so we just punt and allow ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all of them. ~~~~~~~~~~~~ */ ~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < nentries; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ EMACS_INT first, last; ~~~~~~~~~~~~~~~~~~~~~~ /* This denotes a range of lead bytes that are not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. */ ~~~~~~~~~~~~~~~~~~ int firstlead, lastlead; ~~~~~~~~~~~~~~~~~~~~~~~~ Lisp_Object dummy_val; ~~~~~~~~~~~~~~~~~~~~~~ int jj; ~~~~~~~ unified_range_table_get_range ((void *) p, j, &first, &last, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &dummy_val); ~~~~~~~~~~~~ /* With Unicode-internal, lead bytes that are entirely ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ within the range and not including the beginning or end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are definitely not in the fastmap. Leading bytes that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include the beginning or ending characters will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the fastmap unless the beginning or ending characters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are the first or last character, respectively, that uses ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this lead byte. ~~~~~~~~~~~~~~~ @@#### WARNING! In order to determine whether we are the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first or last character using a lead byte we use and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ embed in the code some knowledge of how UTF-8 works -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least, the fact that the the first character using a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ particular lead byte has the minimum-numbered trailing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte in all its trailing bytes, and the last character ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ using a particular lead byte has the maximum-numbered ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ trailing byte in all its trailing bytes. We abstract ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ away the actual minimum/maximum trailing byte numbers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least. We could perhaps do this more portably by ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ just looking at the representation of the character one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ higher or lower and seeing if the lead byte changes, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ you'd run into the problem of invalid characters, e.g. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if you're at the edge of the range of surrogates or are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the top-most allowed character. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (first < 0x80) ~~~~~~~~~~~~~~~~~ firstlead = first; ~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen = set_itext_ichar (strr, first); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Determine if we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte. */ ~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != FIRST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If not, this leading byte might occur, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ make sure it gets added to the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ firstlead = *strr + 1; ~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* Otherwise, we're the first character using our ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leading byte, and we don't need to add the leading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ byte to the fastmap. (If our range doesn't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ completely cover the leading byte, it will get added ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anyway by the code handling the other end of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range.) */ ~~~~~~~~~~ firstlead = *strr; ~~~~~~~~~~~~~~~~~~ } ~ if (last < 0x80) ~~~~~~~~~~~~~~~~ lastlead = last; ~~~~~~~~~~~~~~~~ else ~~~~ { ~ Ibyte strr[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ Bytecount slen ~~~~~~~~~~~~~~ = set_itext_ichar (strr, ~~~~~~~~~~~~~~~~~~~~~~~~ min (last, ~~~~~~~~~~ CHAR_CODE_LIMIT - 1)); ~~~~~~~~~~~~~~~~~~~~~~ int kk; ~~~~~~~ /* Same as above but for the last character using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ our leading byte. */ ~~~~~~~~~~~~~~~~~~~~ for (kk = 1; kk < slen; kk++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (strr[kk] != LAST_TRAILING_BYTE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lastlead = *strr - 1; ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ lastlead = *strr; ~~~~~~~~~~~~~~~~~ } ~ /* Now, FIRSTLEAD and LASTLEAD are set to the beginning and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end, inclusive, of a range of lead bytes that cannot be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in the fastmap. Essentially, we want to set all the other ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bytes to be in the fastmap. Here we handle those after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the previous range and before this one. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (jj = smallest_prev; jj < firstlead; jj++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[jj] = 1; ~~~~~~~~~~~~~~~~ smallest_prev = lastlead + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Also set lead bytes after the end of the final range. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = smallest_prev; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* If it's not a possible first byte, it can't be in the fastmap. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In UTF-8, lead bytes are not contiguous with ASCII, so a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range spanning the ASCII/non-ASCII boundary will put ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extraneous bytes in the range [0x80 - 0xBF] in the fastmap. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 0; ~~~~~~~~~~~~~~~ #endif /* UNICODE_INTERNAL */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ { ~ int fastmap_newline = fastmap['\n']; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* `.' matches anything ... */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* "anything" only includes bytes that can be the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ first byte of a character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else ~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif ~~~~~~ /* ... except perhaps newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!(bufp->syntax & RE_DOT_NEWLINE)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap['\n'] = fastmap_newline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Return if we have already set `can_be_null'; if we have, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the fastmap is irrelevant. Something's wrong here. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if (bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ /* Otherwise, have to check alternative paths. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifndef emacs ~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX (ignored, j) != Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ break; ~~~~~~ #else /* emacs */ ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ /* This match depends on text properties. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ aborting optimizations. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ goto done; ~~~~~~~~~~ #if 0 /* all of the following code is unused now that the `syntax-table' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ property exists -- it's trickier to do this than just look in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the buffer. &&#### but we could just use the syntax-cache stuff ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ instead; why don't we? --ben */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ k = (int) Sword; ~~~~~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte any of whose characters can have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ k = *p++; ~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ for (j = 0; j < 0x80; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ /* @@#### To be correct, we need to set the fastmap for any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lead byte all of whose characters do not have this syntax code. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is hard to calculate so we just punt for now. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (j = 0x80; j < 0x100; j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (ibyte_first_byte_p (j)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ for (j = 0; j < (1 << BYTEWIDTH); j++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (SYNTAX ~~~~~~~~~~ (XCHAR_TABLE ~~~~~~~~~~~~ (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) k) ~~~~~~~~~~~~~~~~~~~~ fastmap[j] = 1; ~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* 0 */ ~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ /* end if category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* All cases after this match the empty string. These end with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `continue'. */ ~~~~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ case at_dot: ~~~~~~~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ #ifndef emacs ~~~~~~~~~~~~~ case wordbound: ~~~~~~~~~~~~~~~ case notwordbound: ~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ case wordend: ~~~~~~~~~~~~~ #endif ~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case jump_n: ~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ if (j > 0) ~~~~~~~~~~ continue; ~~~~~~~~~ /* Jump backward implies we just went through the body of a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop and matched nothing. Opcode jumped to should be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `on_failure_jump' or `succeed_n'. Just treat it like an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ordinary jump. For a * loop, it has pushed its failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point already; if so, discard that as redundant. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) *p != on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p != succeed_n) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ p++; ~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += j; ~~~~~~~ /* If what's on the stack is where we are now, pop it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY () ~~~~~~~~~~~~~~~~~~~~~~~~ && fail_stack.stack[fail_stack.avail - 1].pointer == p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack.avail--; ~~~~~~~~~~~~~~~~~~~ continue; ~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ handle_on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (j, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* For some patterns, e.g., `(a?)?', `p+j' here points to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end of the pattern. We don't want to push such a point, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since when we restore it above, entering the switch will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ increment `p' past the end of the pattern. We don't need ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to push such a point since we obviously won't find any more ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap entries beyond `pend'. Such a pattern can match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the null string, though. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p + j < pend) ~~~~~~~~~~~~~~~~~ { ~ if (!PUSH_PATTERN_OP (p + j, fail_stack)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ bufp->can_be_null = 1; ~~~~~~~~~~~~~~~~~~~~~~ if (succeed_n_p) ~~~~~~~~~~~~~~~~ { ~ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ succeed_n_p = false; ~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ /* Get to the number of times to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ /* Increment p past the n for when k != 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (k, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (k == 0) ~~~~~~~~~~~ { ~ p -= 4; ~~~~~~~ succeed_n_p = true; /* Spaghetti code alert. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto handle_on_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ continue; ~~~~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ p += 4; ~~~~~~~ continue; ~~~~~~~~~ default: ~~~~~~~~ ABORT (); /* We have listed all the cases. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } /* switch *p++ */ ~~~~~~~~~~~~~~~~~~~ /* Getting here means we have found the possible starting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters for one path of the pattern -- and that the empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string does not match. We need not follow this path further. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead, look at the next alternative (remembered on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack), or quit if no more. The test at the top of the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ does these things. */ ~~~~~~~~~~~~~~~~~~~~~~ path_can_be_null = false; ~~~~~~~~~~~~~~~~~~~~~~~~~ p = pend; ~~~~~~~~~ } /* while p */ ~~~~~~~~~~~~~~~ /* Set `can_be_null' for the last path (also the first path, if the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern is empty). */ ~~~~~~~~~~~~~~~~~~~~~~ bufp->can_be_null |= path_can_be_null; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ done: ~~~~~ RESET_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return 0; ~~~~~~~~~ } /* re_compile_fastmap */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this memory for recording register information. STARTS and ENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ must be allocated using the malloc library routine, and must each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ be at least NUM_REGS * sizeof (regoff_t) bytes long. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If NUM_REGS == 0, then subsequent matches should allocate their own ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register data. ~~~~~~~~~~~~~~ Unless this function is called, the first search or match using ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PATTERN_BUFFER will allocate its own register data, without ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ freeing the old data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ void ~~~~ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs, regoff_t *starts, regoff_t *ends) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (num_regs) ~~~~~~~~~~~~~ { ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = num_regs; ~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = starts; ~~~~~~~~~~~~~~~~~~~~~ regs->end = ends; ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ bufp->regs_allocated = REGS_UNALLOCATED; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->num_regs = 0; ~~~~~~~~~~~~~~~~~~~ regs->start = regs->end = (regoff_t *) 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ ~ /* Searching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Like re_search_2, below, but only one string is specified, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ doesn't let you say where to stop matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int startpos, int range, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ return re_search_2 (bufp, NULL, 0, string, size, startpos, range, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs, size RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Using the compiled pattern in BUFP->buffer, first tries to match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ virtual concatenation of STRING1 and STRING2, starting first at index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STARTPOS, then at STARTPOS + 1, and so on. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE is how far to scan while trying to match. RANGE = 0 means try ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ only at STARTPOS; in general, the last start tried is STARTPOS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RANGE. ~~~~~~ All sizes and positions refer to bytes (not chars); under Mule, the code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ knows about the format of the text and will only check at positions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ where a character starts. ~~~~~~~~~~~~~~~~~~~~~~~~~ With MULE, RANGE is a byte position, not a char position. The last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start tried is the character starting <= STARTPOS + RANGE. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In REGS, return the indices of the virtual concatenation of STRING1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and STRING2 that matched the entire BUFP->buffer and its contained ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpressions. ~~~~~~~~~~~~~~~ Do not consider matching one past the index STOP in the virtual ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ concatenation of STRING1 and STRING2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return either the position in the strings at which the match was ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ found, -1 if no match, or -2 if error (such as failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack overflow). */ ~~~~~~~~~~~~~~~~~~~~ int ~~~ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *str2, int size2, int startpos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int range, struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int val; ~~~~~~~~ re_char *string1 = (re_char *) str1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *string2 = (re_char *) str2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER char *fastmap = bufp->fastmap; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int total_size = size1 + size2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int endpos = startpos + range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ int anchored_at_begline = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ re_char *d; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth; ~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ int forward_search_p; ~~~~~~~~~~~~~~~~~~~~~ /* Check for out-of-range STARTPOS. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < 0 || startpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ /* Fix up RANGE if it might eventually take us outside ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the virtual concatenation of STRING1 and STRING2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (endpos < 0) ~~~~~~~~~~~~~~~ range = 0 - startpos; ~~~~~~~~~~~~~~~~~~~~~ else if (endpos > total_size) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = total_size - startpos; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forward_search_p = range > 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (void) (forward_search_p); /* This is only used with assertions, silence the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compiler warning when they're turned off. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the search isn't to be a backwards one, don't waste time in a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ search for a pattern that must be anchored. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (startpos > 0) ~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ else ~~~~ { ~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef emacs ~~~~~~~~~~~~ /* In a forward search for something that starts with \=. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't keep searching past point. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!BUFFERP (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ range = (BYTE_BUF_PT (XBUFFER (lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range < 0) ~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do this after the above return()s. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Update the fastmap now if not correct already. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && !bufp->fastmap_accurate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ long i = 0; ~~~~~~~~~~~ while (i < bufp->used) ~~~~~~~~~~~~~~~~~~~~~~ { ~ if (bufp->buffer[i] == start_memory || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->buffer[i] == stop_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ i += 4; ~~~~~~~ else ~~~~ break; ~~~~~~ } ~ anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Update the mirror syntax table if it's used and dirty. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, startpos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Loop through the string, looking for a place to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ #ifdef REGEX_BEGLINE_CHECK ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If the regex is anchored at the beginning of a line (i.e. with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^), then we can speed things up by skipping to the next ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning-of-line. However, to determine "beginning of line" we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to look at the previous char, so can't do this check if at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ beginning of either string. (Well, we could if at the beginning of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the second string, but it would require additional code, and this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is just an optimization.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (anchored_at_begline && startpos > 0 && startpos != size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (range > 0) ~~~~~~~~~~~~~~ { ~ /* whose stupid idea was it anyway to make this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function take two strings to match?? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int lim = 0; ~~~~~~~~~~~~ re_char *orig_d; ~~~~~~~~~~~~~~~~ re_char *stop_d; ~~~~~~~~~~~~~~~~ /* Compute limit as below in fastmap code, so we are guaranteed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to remain within a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orig_d = d; ~~~~~~~~~~~ stop_d = d + range - lim; ~~~~~~~~~~~~~~~~~~~~~~~~~ /* We want to find the next location (including the current ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one) where the previous char is a newline, so back up one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and search forward for a newline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != '\n') ~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ while (d < stop_d && ~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we were stopped by a newline, skip forward over it. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Otherwise we will get in an infloop when our start position ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ was at begline. */ ~~~~~~~~~~~~~~~~~~ if (d < stop_d) ~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d - orig_d; ~~~~~~~~~~~~~~~~~~~~ startpos += d - orig_d; ~~~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (range < 0) ~~~~~~~~~~~~~~~~~~~ { ~ /* We're lazy, like in the fastmap code below */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar c; ~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); ~~~~~~~~~~~~~~~~~~~~~ if (c != '\n') ~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ } ~ #endif /* REGEX_BEGLINE_CHECK */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If a fastmap is supplied, skip quickly over characters that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cannot be the start of a match. If the pattern can match the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ null string, however, we don't need to skip characters; we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the first null string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap && startpos < total_size && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* For the moment, fastmap always works as if buffer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is in default format, so convert chars in the search strings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ into default format as we go along, if necessary. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ &&#### fastmap needs rethinking for 8-bit-fixed so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it's faster. We need it to reflect the raw ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 8-bit-fixed values. That isn't so hard if we assume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that the top 96 bytes represent a single 1-byte ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ charset. For 16-bit/32-bit stuff it's probably not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ worth it to make the fastmap represent the raw, due to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its nature -- we'd have to use the LSB for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fastmap, and that causes lots of problems with Mule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ chars, where it essentially wipes out the usefulness ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of the fastmap entirely. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range > 0) /* Searching forwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int lim = 0; ~~~~~~~~~~~~ int irange = range; ~~~~~~~~~~~~~~~~~~~ if (startpos < size1 && startpos + range >= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lim = range - (size1 - startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Written out as an if-else to avoid testing `translate' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ inside the loop. */ ~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #else ~~~~~ if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #ifdef MULE ~~~~~~~~~~~ else if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ while (range > lim) ~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ else ~~~~ { ~ while (range > lim && !fastmap[*d]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_char *old_d = d; ~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR (d); ~~~~~~~~~~~~~~~~~ range -= (d - old_d); ~~~~~~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ startpos += irange - range; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else /* Searching backwards. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* #### It's not clear why we don't just write a loop, like ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the moving-forward case. Perhaps the writer got lazy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since backward searches aren't so common. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ { ~ Ibyte tempch[MAX_ICHAR_LEN]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar buf_ch = ~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_itext_ichar (tempch, buf_ch); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!fastmap[*tempch]) ~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ } ~ #else ~~~~~ if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto advance; ~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ } ~ } ~ /* If can't match the null string, and that's all we have left, fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (range >= 0 && startpos == total_size && fastmap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !bufp->can_be_null) ~~~~~~~~~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ val = re_match_2_internal (bufp, string1, size1, string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos, regs, stop ~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ #ifndef REGEX_MALLOC ~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (val >= 0) ~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return startpos; ~~~~~~~~~~~~~~~~ } ~ if (val == -2) ~~~~~~~~~~~~~~ { ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ advance: ~~~~~~~~ if (!range) ~~~~~~~~~~~ break; ~~~~~~ else if (range > 0) ~~~~~~~~~~~~~~~~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos >= size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range -= d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos += d_size; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ Bytecount d_size; ~~~~~~~~~~~~~~~~~ /* Note startpos > size1 not >=. If we are on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string1/string2 boundary, we want to backup into string1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = ((const unsigned char *) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (startpos > size1 ? string2 - size1 : string1) + startpos); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ d_size = itext_ichar_len_fmt (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ range += d_size; ~~~~~~~~~~~~~~~~ assert (!forward_search_p || range >= 0); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ startpos -= d_size; ~~~~~~~~~~~~~~~~~~~ } ~ } ~ UNBIND_REGEX_MALLOC_CHECK (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } /* re_search_2 */ ~~~~~~~~~~~~~~~~~~~ ~ /* Declarations and macros for re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This converts PTR, a pointer into one of the search strings `string1' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and `string2' into an offset from the beginning of that string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POINTER_TO_OFFSET(ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (FIRST_STRING_P (ptr) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) ((ptr) - string1)) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) ((ptr) - string2 + size1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Macros for dealing with the split strings in re_match_2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define MATCHING_IN_FIRST_STRING (dend == end_match_1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Call before fetching a character with *d. This switches over to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string2 if necessary. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #define REGEX_PREFETCH() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (d == dend) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { \ ~~~~~~~~~~~ /* End of string2 => fail. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend == end_match_2) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; \ ~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = string2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; \ ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Test if at very beginning or at very end of the virtual concatenation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of `string1' and `string2'. If only one string, it's `string2'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define AT_STRINGS_END(d) ((d) == end2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* XEmacs change: ~~~~~~~~~~~~~~~~~ If the given position straddles the string gap, return the equivalent ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ position that is before or after the gap, respectively; otherwise, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return the same position. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Test if CH is a word-constituent character. (XEmacs change) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define WORDCHAR_P(ch) \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Free everything we malloc. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_FREE_STACK (fail_stack.stack); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (old_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regstart, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (best_regend, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_dummy, re_char **); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VAR (reg_info_dummy, register_info_type *); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #else /* not MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define FREE_VARIABLES() \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do { \ ~~~~~~~~~~~ UNBIND_REGEX_MALLOC_CHECK (); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } while (0) ~~~~~~~~~~~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* These values must meet several constraints. They must not be valid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register values, which means we can use numbers larger than MAX_REGNUM. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ They must differ by 1, because of NUM_FAILURE_ITEMS above. And the value ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for the lowest register must be larger than the value for the highest ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register, so we do not try to actually save any registers when none are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ #define NO_HIGHEST_ACTIVE_REG (MAX_REGNUM + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~ /* Matching routines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifndef emacs /* XEmacs never uses this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* re_match is like re_match_2 except it takes only a single string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match (struct re_pattern_buffer *bufp, const char *string, int size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int pos, struct re_registers *regs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, size ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ #endif /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2 matches the compiled pattern in BUFP against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SIZE2, respectively). We start matching at POS, and stop matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at STOP. ~~~~~~~~ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ store offsets for the substring each group matched in REGS. See the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ documentation for exactly how many groups we fill. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We return -1 if no match, -2 if an internal error (such as the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure stack overflowing). Otherwise, we return the length of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched substring. */ ~~~~~~~~~~~~~~~~~~~~~~ int ~~~ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, const char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int result; ~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ /* Update the mirror syntax table if it's dirty now, this would otherwise ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cause a malloc() in charset_mule in re_match_2_internal() when checking ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ characters' syntax. */ ~~~~~~~~~~~~~~~~~~~~~~ SYNTAX_CODE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), 'a'); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scache = setup_syntax_cache (scache, lispobj, lispbuf, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos (lispobj, pos), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1); ~~~ #endif ~~~~~~ result = re_match_2_internal (bufp, (re_char *) string1, size1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (re_char *) string2, size2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ pos, regs, stop ~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS); ~~~~~~~~~~~~~~~~~~~~~~ ALLOCA_GARBAGE_COLLECT (); ~~~~~~~~~~~~~~~~~~~~~~~~~~ return result; ~~~~~~~~~~~~~~ } ~ /* This is a separate function so that we can force an alloca cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ afterwards. */ ~~~~~~~~~~~~~~~ static int ~~~~~~~~~~ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int size1, re_char *string2, int size2, int pos, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ struct re_registers *regs, int stop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_LISP_CONTEXT_ARGS_MULE_DECL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* General temporaries. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ int mcnt; ~~~~~~~~~ re_char *p1; ~~~~~~~~~~~~ int should_succeed; /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Just past the end of the corresponding string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end1, *end2; ~~~~~~~~~~~~~~~~~~~~~ /* Pointers into string1 and string2, just past the last characters in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ each to consider matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *end_match_1, *end_match_2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Where we are in the data, and the end of the current string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *d, *dend; ~~~~~~~~~~~~~~~~~~ /* Where we are in the pattern, and the end of the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *p; ~~~~~~~~~~~~~~~~~ re_char *pstart; ~~~~~~~~~~~~~~~~ REGISTER re_char *pend; ~~~~~~~~~~~~~~~~~~~~~~~ /* Mark the opcode just after a start_memory, so we can test for an ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty subpattern when we get to the stop_memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *just_past_start_mem = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We use this to map every character in the string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE_TYPE translate = bufp->translate; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Failure point stack. Each place that can handle a failure further ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ down the line pushes a failure point on this stack. It consists of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restart, regend, and reg_info for all registers corresponding to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the subexpressions we're currently inside, plus the number of such ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers, and, finally, two char *'s. The first char * is where ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to resume scanning the pattern; the second one is where to resume ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scanning the strings. If the latter is zero, the failure point is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a ``dummy''; if a failure happens and the failure point is a dummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it gets discarded and the next one is tried. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_type fail_stack; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ static int failure_id; ~~~~~~~~~~~~~~~~~~~~~~ int nfailure_points_pushed = 0, nfailure_points_popped = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef REGEX_REL_ALLOC ~~~~~~~~~~~~~~~~~~~~~~ /* This holds the pointer to the failure stack, when ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it is allocated relocatably. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail_stack_elt_t *failure_stack_ptr; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* We fill all the registers internally, independent of what we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return, for use in backreferences. The number here includes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ an element for register zero. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t num_regs = bufp->re_ngroups + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The currently active registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Information on the contents of registers. These are pointers into ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the input strings; they record just what was matched (on this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ attempt) by a subexpression part of the pattern, that is, the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum-th regstart pointer points to where in the pattern we began ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching and the regnum-th regend points to right after where we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stopped matching the regnum-th subexpression. (The zeroth register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ keeps track of what the whole pattern matches.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **regstart, **regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* If a group that's operated upon by a repetition operator fails to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match anything, then the register for its start will need to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ restored because it will have been set to wherever in the string we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are when we last see its open-group operator. Similarly for a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register's end. */ ~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **old_regstart, **old_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The is_active field of reg_info helps us keep track of which (possibly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nested) subexpressions we are currently in. The matched_something ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ field of reg_info[reg_num] helps us tell whether or not we have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matched any of the pattern so far this time through the reg_num-th ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ subexpression. These two fields get reset each time through any ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop their register is in. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* The following record the register info as found in the above ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ variables when we find a match better than any we've seen before. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens as we backtrack through the failure points, which in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ turn happens only if we have not yet matched the entire string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int best_regs_set = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **best_regstart, **best_regend; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* Logically, this is `best_regend[0]'. But we don't want to have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocate space for that if we're not allocating space for anything ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else (see below). Also, we never need info about register 0 for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ any of the other register vectors, and it seems rather a kludge to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ treat `best_regend' differently than the rest. So we keep track of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the best match so far in a separate variable. We ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ initialize this to NULL so that when we backtrack the first time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and need to test it, it's not garbage. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char *match_end = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This helps SET_REGS_MATCHED avoid doing redundant work. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Used when we pop values we don't care about. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_char **reg_dummy; ~~~~~~~~~~~~~~~~~~~~ register_info_type *reg_info_dummy; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #ifdef DEBUG ~~~~~~~~~~~~ /* Counts the total number of registers pushed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int num_regs_pushed = 0; ~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ /* 1 if this match ends in the same string (string1 or string2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ as the best previous match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool same_str_p; ~~~~~~~~~~~~~~~~~~~ /* 1 if this match is the best seen so far. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_bool best_match_p; ~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ Internal_Format fmt = buffer_or_other_internal_format (lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef REL_ALLOC ~~~~~~~~~~~~~~~~ const Ibyte *orig_buftext = ~~~~~~~~~~~~~~~~~~~~~~~~~~~ BUFFERP (lispobj) ? ~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BYTE_BUF_BEG (XBUFFER (lispobj))) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0; ~~ #endif ~~~~~~ #ifdef ERROR_CHECK_MALLOC ~~~~~~~~~~~~~~~~~~~~~~~~~ int depth = bind_regex_malloc_disallowed (1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ INIT_FAIL_STACK (); ~~~~~~~~~~~~~~~~~~~ p = (unsigned char *) ALLOCA (bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ /* re_match_2_internal() modifies the compiled pattern (see the succeed_n, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ jump_n, set_number_at opcodes), make it re-entrant by working on a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ copy. This should also give better locality of reference. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ memcpy (p, bufp->buffer, bufp->used); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pstart = (re_char *) p; ~~~~~~~~~~~~~~~~~~~~~~~ pend = pstart + bufp->used; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef MATCH_MAY_ALLOCATE ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Do not bother to initialize all the register variables if there are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no groups in the pattern, as it takes a fair amount of time. If ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ there are groups, we include space for register 0 (the whole ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern), even though we never use it, since it simplifies the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ array indexing. We should fix this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->re_ngroups) ~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regstart = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy = REGEX_TALLOC (num_regs, re_char *); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ if (!(regstart && regend && old_regstart && old_regend && reg_info ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && best_regstart && best_regend && reg_dummy && reg_info_dummy)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ else ~~~~ { ~ /* We must initialize all our variables to NULL, so that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `FREE_VARIABLES' doesn't try to free them. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart = regend = old_regstart = old_regend = best_regstart ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regend = reg_dummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_info = reg_info_dummy = (register_info_type *) NULL; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif /* MATCH_MAY_ALLOCATE */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #if defined (emacs) && defined (REL_ALLOC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If the allocations above (or the call to setup_syntax_cache() in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ re_match_2) caused a rel-alloc relocation, then fix up the data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pointers */ ~~~~~~~~~~~ Bytecount offset = offset_post_relocation (lispobj, orig_buftext); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (offset) ~~~~~~~~~~~ { ~ string1 += offset; ~~~~~~~~~~~~~~~~~~ string2 += offset; ~~~~~~~~~~~~~~~~~~ } ~ } ~ #endif /* defined (emacs) && defined (REL_ALLOC) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* The starting position is bogus. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (pos < 0 || pos > size1 + size2) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -1; ~~~~~~~~~~ } ~ /* Initialize subexpression text positions to our sentinel to mark ones that ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ no start_memory/stop_memory has been seen for. Also initialize the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register information struct. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We move `string1' into `string2' if the latter's empty -- but not if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `string1' is null. */ ~~~~~~~~~~~~~~~~~~~~~~ if (size2 == 0 && string1 != NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ string2 = string1; ~~~~~~~~~~~~~~~~~~ size2 = size1; ~~~~~~~~~~~~~~ string1 = 0; ~~~~~~~~~~~~ size1 = 0; ~~~~~~~~~~ } ~ end1 = string1 + size1; ~~~~~~~~~~~~~~~~~~~~~~~ end2 = string2 + size2; ~~~~~~~~~~~~~~~~~~~~~~~ /* Compute where to stop matching, within the two strings. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (stop <= size1) ~~~~~~~~~~~~~~~~~~ { ~ end_match_1 = string1 + stop; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_2 = string2; ~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ end_match_1 = end1; ~~~~~~~~~~~~~~~~~~~ end_match_2 = string2 + stop - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* `p' scans through the pattern as `d' scans through the data. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `dend' is the end of the input string that `d' points within. `d' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is advanced into the following input string whenever necessary, but ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this happens before fetching; therefore, at the beginning of the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ loop, `d' can be pointing at the end of a string, but it cannot ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ equal `string2'. */ ~~~~~~~~~~~~~~~~~~~~ if (size1 > 0 && pos <= size1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ d = string1 + pos; ~~~~~~~~~~~~~~~~~~ dend = end_match_1; ~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ d = string2 + pos - size1; ~~~~~~~~~~~~~~~~~~~~~~~~~~ dend = end_match_2; ~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("The string to match is: `"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("'\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This loops over pattern commands. It exits by returning from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ function if the match is complete, or it drops through if the match ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fails at this starting point in the input data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ DEBUG_MATCH_PRINT2 ("\n0x%zx: ", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!no_quit_in_re_search) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ QUIT; ~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ if (p == pend) ~~~~~~~~~~~~~~ { /* End of pattern means we might have succeeded. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("end of pattern ... "); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we haven't matched the entire string, and we want the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ longest match, try backtracking. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d != end_match_2) ~~~~~~~~~~~~~~~~~~~~~ { ~ same_str_p = (FIRST_STRING_P (match_end) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCHING_IN_FIRST_STRING); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* AIX compiler got confused when this was combined ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ with the previous declaration. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (same_str_p) ~~~~~~~~~~~~~~~ best_match_p = d > match_end; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ best_match_p = !MATCHING_IN_FIRST_STRING; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("backtracking.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* More failure points to try. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If exceeds best match so far, save it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!best_regs_set || best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regs_set = true; ~~~~~~~~~~~~~~~~~~~~~ match_end = d; ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ best_regstart[mcnt] = regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best_regend[mcnt] = regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ goto fail; ~~~~~~~~~~ } ~ /* If no failure points, don't restore garbage. And if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match is real best match, don't restore second ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ best one. */ ~~~~~~~~~~~~ else if (best_regs_set && !best_match_p) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ restore_best_regs: ~~~~~~~~~~~~~~~~~~ /* Restore best match. It may happen that `dend == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ end_match_1' while the restored d is in string2. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, the pattern `x.*y.*z' against the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strings `x-' and `y-z-', if the two strings are ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not consecutive in memory. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d = match_end; ~~~~~~~~~~~~~~ dend = ((d >= string1 && d <= end1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? end_match_1 : end_match_2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[mcnt] = best_regstart[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[mcnt] = best_regend[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* d != end_match_2 */ ~~~~~~~~~~~~~~~~~~~~~~~~ succeed_label: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("Accepting match.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If caller wants register contents data back, do it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int num_nonshy_regs = bufp->re_nsub + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs && !bufp->no_sub) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Have the register data arrays been allocated? */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (bufp->regs_allocated == REGS_UNALLOCATED) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* No. So allocate them with malloc. We need one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ extra element beyond `num_regs' for the `-1' marker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GNU code uses. */ ~~~~~~~~~~~~~~~~~~ regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end = TALLOC (regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ bufp->regs_allocated = REGS_REALLOCATE; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if (bufp->regs_allocated == REGS_REALLOCATE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { /* Yes. If we need more elements than were already ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ allocated, reallocate them. If we need fewer, just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave it alone. */ ~~~~~~~~~~~~~~~~~~~ if (regs->num_regs < num_nonshy_regs + 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->num_regs = num_nonshy_regs + 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->start, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RETALLOC (regs->end, regs->num_regs, regoff_t); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->start == NULL || regs->end == NULL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return -2; ~~~~~~~~~~ } ~ } ~ } ~ else ~~~~ { ~ /* The braces fend off a "empty body in an else-statement" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ warning under GCC when assert expands to nothing. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ assert (bufp->regs_allocated == REGS_FIXED); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Convert the pointer data in `regstart' and `regend' to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ indices. Register zero has to be set differently, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ since we haven't kept track of any info for it. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ regs->start[0] = pos; ~~~~~~~~~~~~~~~~~~~~~ regs->end[0] = (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? ((regoff_t) (d - string1)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : ((regoff_t) (d - string2 + size1))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Map over the NUM_NONSHY_REGS non-shy internal registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Copy each into the corresponding external register. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MCNT indexes external registers. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt++) ~~~~~~~ { ~ int internal_reg = bufp->external_to_internal_register[mcnt]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((int)0xDEADBEEF == internal_reg ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || REG_UNSET (regstart[internal_reg]) || ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_UNSET (regend[internal_reg])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ { ~ regs->start[mcnt] = ~~~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->end[mcnt] = ~~~~~~~~~~~~~~~~~ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } /* regs && !bufp->no_sub */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If we have regs and the regs structure has more elements than ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ were in the pattern, set the extra elements starting with ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NUM_NONSHY_REGS to -1. If we (re)allocated the registers, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this is the case, because we always allocate enough to have ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at least one -1 at the end. ~~~~~~~~~~~~~~~~~~~~~~~~~~~ We do this even when no_sub is set because some applications ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (XEmacs) reuse register structures which may contain stale ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information, and permit attempts to access those registers. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It would be possible to require the caller to do this, but we'd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ have to change the API for this function to reflect that, and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ audit all callers. Note: as of 2003-04-17 callers in XEmacs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ do clear the registers, but it's safer to leave this code in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ because of reallocation. ~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ if (regs && regs->num_regs > 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regs->start[mcnt] = regs->end[mcnt] = -1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed, nfailure_points_popped, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nfailure_points_pushed - nfailure_points_popped); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = d - pos - (MATCHING_IN_FIRST_STRING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? string1 ~~~~~~~~~ : string2 - size1); ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FREE_VARIABLES (); ~~~~~~~~~~~~~~~~~~ return mcnt; ~~~~~~~~~~~~ } ~ /* Otherwise match next pattern command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ switch ((re_opcode_t) *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* Ignore these. Used to ignore the n of succeed_n's which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ currently have n == 0. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~ case no_op: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case succeed: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto succeed_label; ~~~~~~~~~~~~~~~~~~~ /* Match exactly a string of length n in the pattern. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ following byte in the pattern defines n, and the n bytes after ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that make up the string to match. (Under Mule, this will be in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the default internal format.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case exactn: ~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is written out as an if-else so we don't waste time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ testing `translate' inside the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (TRANSLATE_P (translate)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ #ifdef MULE ~~~~~~~~~~~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != itext_ichar (p)) ~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ #else /* not MULE */ ~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ mcnt--; ~~~~~~~ #endif ~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ #ifdef MULE ~~~~~~~~~~~ /* If buffer format is default, then we can shortcut and just ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compare the text directly, byte by byte. Otherwise, we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ need to go character by character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (fmt != FORMAT_DEFAULT) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ do ~~ { ~ Bytecount pat_len; ~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_fmt (d, fmt, lispobj) != ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar (p)) ~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ pat_len = itext_ichar_len (p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += pat_len; ~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt -= pat_len; ~~~~~~~~~~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ else ~~~~ #endif ~~~~~~ { ~ do ~~ { ~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (*d++ != *p++) goto fail; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt--; ~~~~~~~ } ~ while (mcnt > 0); ~~~~~~~~~~~~~~~~~ } ~ } ~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Match any character except possibly a newline or a null. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case anychar: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if ((!(bufp->syntax & RE_DOT_NEWLINE) && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->syntax & RE_DOT_NOT_NULL && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ '\000')) ~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" Matched `%c'.\n", *d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ case charset: ~~~~~~~~~~~~~ case charset_not: ~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Cast to `unsigned int' instead of `unsigned char' in case the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bit list is a full 32 bytes long. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ p += 1 + *p; ~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #ifdef MULE ~~~~~~~~~~~ case charset_mule: ~~~~~~~~~~~~~~~~~~ case charset_mule_not: ~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER Ichar c; ~~~~~~~~~~~~~~~~~ re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bitbyte class_bits = *p++; ~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p ? "_not" : ""); ~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ c = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c = RE_TRANSLATE (c); /* The character to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((class_bits && ~~~~~~~~~~~~~~~~~~ ((class_bits & BIT_WORD && ISWORD (c)) /* = ALNUM */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_ALPHA && ISALPHA (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_SPACE && ISSPACE (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_PUNCT && ISPUNCT (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (TRANSLATE_P (translate) ? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (class_bits & (BIT_UPPER | BIT_LOWER) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && !NOCASEP (lispbuf, c)) ~~~~~~~~~~~~~~~~~~~~~~~~~ : ((class_bits & BIT_UPPER && ISUPPER (c)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (class_bits & BIT_LOWER && ISLOWER (c)))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || EQ (Qt, unified_range_table_lookup ((void *) p, c, Qnil))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ not_p = !not_p; ~~~~~~~~~~~~~~~ } ~ p += unified_range_table_bytes_used ((void *) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) goto fail; ~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ /* The beginning of a group is represented by start_memory. The ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the register number in the next two bytes, and the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number of groups inner to this one in the two bytes thereafter. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The text matched within the group is recorded (in the internal ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers data structure) under the register number. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case start_memory: ~~~~~~~~~~~~~~~~~~ { ~ regnum_t regno; ~~~~~~~~~~~~~~~ /* Find out if this group can match the empty string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; /* To send to group_match_null_string_p. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, extract_number (p)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == MATCH_NULL_UNSET_VALUE) ~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ = group_match_null_string_p (&p1, pend, reg_info); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? "NOT" : ""); ~~~~~~~~~~~~~~ /* Save the position in the string where we were the last time ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we were at this open-group operator in case the group is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ operated upon by a repetition operator, e.g., with `(a*)*b' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `ab'; then we want to ignore where we are now in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regstart[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regstart[regno]) ? d : regstart[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regstart[regno]; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[regno] = d; ~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regstart: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regstart[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 1; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This is the new highest active register. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If nothing was active before, this is the new lowest active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register. */ ~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = regno; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Move past the inner group count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += 2; ~~~~~~~ just_past_start_mem = p; ~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* The stop_memory opcode represents the end of a group. Its ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arguments are the same as start_memory's: the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ number, and the number of inner groups. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case stop_memory: ~~~~~~~~~~~~~~~~~ { ~ regnum_t regno, inner_groups; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (inner_groups, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno, inner_groups); ~~~~~~~~~~~~~~~~~~~~~ /* We need to save the string position the last time we were at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this close-group operator in case the group is operated ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ upon by a repetition operator, e.g., with `((a*)*(b*)*)*' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against `aba'; then we want to ignore where we are now in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the string in case this attempt to match fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ old_regend[regno] = REG_MATCH_NULL_STRING_P (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? REG_UNSET (regend[regno]) ? d : regend[regno] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ : regend[regno]; ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" old_regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (old_regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[regno] = d; ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 (" regend: %zd\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POINTER_TO_OFFSET (regend[regno])); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* This register isn't active anymore. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IS_ACTIVE (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Clear this whenever we change the register activity status. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set_regs_matched_done = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this was the only register active, nothing is active ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ anymore. */ ~~~~~~~~~~~~ if (lowest_active_reg == highest_active_reg) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ /* We must scan for the new highest active register, since it ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ isn't necessarily one less than now: consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (a(b)c(d(e)f)g). When group 3 ends, after the f), the new ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest active register is 1. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t r = regno - 1; ~~~~~~~~~~~~~~~~~~~~~~~ while (r > 0 && !IS_ACTIVE (reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r--; ~~~~ /* If we end up at register zero, that means that we saved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the registers as the result of an `on_failure_jump', not ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a `start_memory', and we jumped to past the innermost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `stop_memory'. For example, in ((.)*) we save registers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 and 2 as a result of the *, but when we pop back to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ second ), we are at the stop_memory 1. Thus, nothing is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ active. */ ~~~~~~~~~~~ if (r == 0) ~~~~~~~~~~~ { ~ lowest_active_reg = NO_LOWEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = NO_HIGHEST_ACTIVE_REG; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ highest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~~ /* 98/9/21 jhod: We've also gotta set lowest_active_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ don't we? */ ~~~~~~~~~~~~ r = 1; ~~~~~~ while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ r++; ~~~~ lowest_active_reg = r; ~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ /* If just failed to match something this time around with a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ group that's operated on by a repetition operator, try to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ force exit from the ``loop'', and restore the register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ information for this group that we had before trying this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ last match. */ ~~~~~~~~~~~~~~~ if ((!MATCHED_SOMETHING (reg_info[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || just_past_start_mem == p - 4) && p < pend) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ re_bool is_a_jump_n = false; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ mcnt = 0; ~~~~~~~~~ switch ((re_opcode_t) *p1++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ case jump_n: ~~~~~~~~~~~~ is_a_jump_n = true; ~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (is_a_jump_n) ~~~~~~~~~~~~~~~~ p1 += 2; ~~~~~~~~ break; ~~~~~~ default: ~~~~~~~~ /* do nothing */ ; ~~~~~~~~~~~~~~~~~~ } ~ p1 += mcnt; ~~~~~~~~~~~ /* If the next operation is a jump backwards in the pattern ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to an on_failure_jump right before the start_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ corresponding to this stop_memory, exit from the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ by forcing a failure after pushing on the stack the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump's jump in the pattern, and d. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) p1[3] == start_memory && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regno == extract_nonnegative (p1 + 4)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* If this group ever matched anything, then restore ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ what its registers were before trying this last ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failed match, e.g., with `(a*)*b' against `ab' for ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regstart[1], and, e.g., with `((a*)*(b*)*)*' against ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `aba' for regend[3]. ~~~~~~~~~~~~~~~~~~~~ Also restore the registers for inner groups for, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ e.g., `((a*)(b*))*' against `aba' (register 3 would ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ otherwise get trashed). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (EVER_MATCHED_SOMETHING (reg_info[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int r; ~~~~~~ EVER_MATCHED_SOMETHING (reg_info[regno]) = 0; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Restore this and inner groups' (if any) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ registers. */ ~~~~~~~~~~~~~~ for (r = regno; r < regno + inner_groups; r++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ regstart[r] = old_regstart[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* xx why this test? */ ~~~~~~~~~~~~~~~~~~~~~~~~ if (old_regend[r] >= regstart[r]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regend[r] = old_regend[r]; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ p1++; ~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p1 + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ } ~ } ~ /* We used to move past the register number and inner group count ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ here, when registers were just one byte; that's no longer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ necessary with EXTRACT_NUMBER_AND_INCR(), above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* \ has been turned into a `duplicate' command which is ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ followed by the numeric value of as the register number. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Already passed through external-to-internal-register mapping, so ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ it refers to the actual group number, not the non-shy-only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numbering used in the external world.) */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case duplicate: ~~~~~~~~~~~~~~~ { ~ REGISTER re_char *d2, *dend2; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Get which register to match against. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regnum_t regno; ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (regno, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Can't back reference a group which we've never matched. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* Where in input to try to start matching. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = regstart[regno]; ~~~~~~~~~~~~~~~~~~~~~ /* Where to stop matching; if both the place to start and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the place to stop matching are in the same string, then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ set to the place to stop, otherwise, for now have to use ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the end of the first string. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dend2 = ((FIRST_STRING_P (regstart[regno]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ == FIRST_STRING_P (regend[regno])) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? regend[regno] : end_match_1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (;;) ~~~~~~~~ { ~ /* If necessary, advance to next segment in register ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ contents. */ ~~~~~~~~~~~~~ while (d2 == dend2) ~~~~~~~~~~~~~~~~~~~ { ~ if (dend2 == end_match_2) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (dend2 == regend[regno]) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* End of string1 => advance to string2. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ d2 = string2; ~~~~~~~~~~~~~ dend2 = regend[regno]; ~~~~~~~~~~~~~~~~~~~~~~ } ~ /* At end of register contents => success */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (d2 == dend2) break; ~~~~~~~~~~~~~~~~~~~~~~~ /* If necessary, advance to next segment in data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ /* How many characters left in this segment to match. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend - d; ~~~~~~~~~~~~~~~~ /* Want how many consecutive characters we can match in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ one shot, so, if necessary, adjust the count. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt > dend2 - d2) ~~~~~~~~~~~~~~~~~~~~~~ mcnt = dend2 - d2; ~~~~~~~~~~~~~~~~~~ /* Compare that many; failure if mismatch, else move ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ past them. */ ~~~~~~~~~~~~~~ if (TRANSLATE_P (translate) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ? bcmp_translate (d, d2, mcnt, translate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ , fmt, lispobj ~~~~~~~~~~~~~~ #endif ~~~~~~ ) ~ : memcmp (d, d2, mcnt)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ d += mcnt, d2 += mcnt; ~~~~~~~~~~~~~~~~~~~~~~ /* Do this because we've match some characters. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ } ~ break; ~~~~~~ /* begline matches the empty string at the beginning of the string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (unless `not_bol' is set in `bufp'), and, if ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `newline_anchor' is set, after newlines. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_bol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ re_char *d2 = d; ~~~~~~~~~~~~~~~~ DEC_IBYTEPTR (d2); ~~~~~~~~~~~~~~~~~~ if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ /* In all other cases, we fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ /* endline is the dual of begline. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endline: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ { ~ if (!bufp->not_eol) break; ~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* We have to ``prefetch'' the next character. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else if ((d == end1 ? ~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (string2, fmt, lispobj) : ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && bufp->newline_anchor) ~~~~~~~~~~~~~~~~~~~~~~~~ { ~ break; ~~~~~~ } ~ goto fail; ~~~~~~~~~~ /* Match at the very beginning of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case begbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* Match at the very end of the data. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case endbuf: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ /* on_failure_keep_string_jump is used to optimize `.*\n'. It ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pushes NULL as the value for the string on the stack. Then ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point' will keep the current value for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string, instead of restoring it. To see why, consider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching `foo\nbar' against `.*\n'. The .* matches the foo; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then the . fails against the \n. But the next thing we want ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to do is match the \n against the \n; if we restored the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ string value, we would be back at the foo. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this is used only in specific cases, we don't need to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ check all the things that `on_failure_jump' does, to make ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sure the right things get saved on the stack. Hence we don't ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ share its code. The only reason to push anything on the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ stack at all is that otherwise we would have to change ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `anychar's code to do something besides goto fail in this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case; that seems worse than this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_keep_string_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx):\n", mcnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Uses of on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each alternative starts with an on_failure_jump that points ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the beginning of the next alternative. Each alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ except the last ends with a jump that in effect jumps past ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the rest of the alternatives. (They really jump to the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ending jump of the following alternative, because tensioning ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ these jumps is a hassle.) ~~~~~~~~~~~~~~~~~~~~~~~~~ Repeats start with an on_failure_jump that points past both ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the repetition text and either the following jump or ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump back to this on_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case on_failure_jump: ~~~~~~~~~~~~~~~~~~~~~ on_failure: ~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_jump"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %d (to 0x%zx)", mcnt, (Bytecount) (p + mcnt)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* If this on_failure_jump comes right before a group (i.e., ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the original * applied to a group), save the information ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for that group and all inner ones, so that if we fail back ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to this point, the group's information will be correct. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For example, in \(a*\)*\1, we need the preceding group, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ and in \(\(a*\)b*\)\2, we need the inner group. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We can't use `p' to check ahead because we push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ a failure point to `p + mcnt' after we do this. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1 = p; ~~~~~~~ /* We need to skip no_op's before we look for the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start_memory in case this on_failure_jump is happening as ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against aba. */ ~~~~~~~~~~~~~~~~ while (p1 < pend && (re_opcode_t) *p1 == no_op) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p1++; ~~~~~ if (p1 < pend && (re_opcode_t) *p1 == start_memory) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We have a new highest active register now. This will ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ get reset at the start_memory we are about to get to, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ but we will have saved all the registers relevant to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ this repetition op, as described above. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest_active_reg = *(p1 + 1) + *(p1 + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lowest_active_reg = *(p1 + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ DEBUG_MATCH_PRINT1 (":\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT (p + mcnt, d, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* A smart repeat ends with `maybe_pop_jump'. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We change it to either `pop_failure_jump' or `jump'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case maybe_pop_jump: ~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER const unsigned char *p2 = p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Compare the beginning of the repeat with what in the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pattern follows its end. If we can establish that there ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ is nothing that they would both match, i.e., that we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ would have to backtrack because of (as in, e.g., `a*a') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then we can change to pop_failure_jump, because we'll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ never have to backtrack. ~~~~~~~~~~~~~~~~~~~~~~~~ This is not true in the case of alternatives: in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `(a|ab)*' we do need to backtrack to the `ab' alternative ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (e.g., if the string was `ab'). But instead of trying to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ detect that here, the alternative has put on a dummy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point which is what we will end up popping. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Skip over open/close-group commands. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If what follows this loop is a ...+ construct, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ look at what begins its body, since we will have to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ match at least one of that. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (1) ~~~~~~~~~ { ~ if (p2 + 2 < pend ~~~~~~~~~~~~~~~~~ && ((re_opcode_t) *p2 == stop_memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) *p2 == start_memory)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 3; ~~~~~~~~ else if (p2 + 6 < pend ~~~~~~~~~~~~~~~~~~~~~~ && (re_opcode_t) *p2 == dummy_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 += 6; ~~~~~~~~ else ~~~~ break; ~~~~~~ } ~ p1 = p + mcnt; ~~~~~~~~~~~~~~ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ to the `maybe_finalize_jump' of this case. Examine what ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ follows. */ ~~~~~~~~~~~~ /* If we're at the end of the pattern, we can change. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (p2 == pend) ~~~~~~~~~~~~~~~ { ~ /* Consider what happens when matching ":\(.*\)" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ against ":/". I don't really understand this code ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ yet. */ ~~~~~~~~ ((unsigned char *)p)[-3] = (re_char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ~~~~~~~~~~~~~~~~~~ (" End of pattern: change to `pop_failure_jump'.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else if ((re_opcode_t) *p2 == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p1[3] == exactn && p1[5] != c) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || (re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int not_p = (re_opcode_t) p1[3] == charset_not; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (c < (unsigned char) (p1[4] * BYTEWIDTH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ not_p = !not_p; ~~~~~~~~~~~~~~~ /* `not_p' is equal to 1 if c would match, which means ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ that we can't change to pop_failure_jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!not_p) ~~~~~~~~~~~ { ~ ((unsigned char *)p)[-3] ~~~~~~~~~~~~~~~~~~~~~~~~ = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ else if ((re_opcode_t) *p2 == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ #ifdef DEBUG ~~~~~~~~~~~~ REGISTER unsigned char c ~~~~~~~~~~~~~~~~~~~~~~~~ = *p2 == (unsigned char) endline ? '\n' : p2[2]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ if ((re_opcode_t) p1[3] == exactn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (p2[2 + p1[5] / BYTEWIDTH] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ & (1 << (p1[5] % BYTEWIDTH))))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ c, p1[5]); ~~~~~~~~~~ } ~ else if ((re_opcode_t) p1[3] == charset_not) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset_not inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lists every character listed in the charset after. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; idx < (int) p2[1]; idx++) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (! (p2[2 + idx] == 0 ~~~~~~~~~~~~~~~~~~~~~~~ || (idx < (int) p1[4] ~~~~~~~~~~~~~~~~~~~~~ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1]) ~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *) p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ else if ((re_opcode_t) p1[3] == charset) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ int idx; ~~~~~~~~ /* We win if the charset inside the loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ has no overlap with the one after the loop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for (idx = 0; ~~~~~~~~~~~~~ idx < (int) p2[1] && idx < (int) p1[4]; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ idx++) ~~~~~~ if ((p2[2 + idx] & p1[5 + idx]) != 0) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ if (idx == p2[1] || idx == p1[4]) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p3 = (unsigned char *)p; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p3[-3] = (unsigned char) pop_failure_jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ } ~ } ~ } ~ p -= 2; /* Point at relative address again. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ((re_opcode_t) p[-1] != pop_failure_jump) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ { ~ p[-1] = (unsigned char) jump; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 (" Match => jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* Note fall through. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ /* The end of a simple repeat has a pop_failure_jump back to ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ its matching on_failure_jump, where the latter will push a ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ failure point. The pop_failure_jump takes off failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ points put on by this pop_failure_jump's matching ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ on_failure_jump; we got through the pattern to here from the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ matching on_failure_jump, so didn't fail. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case pop_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~ { ~ /* We need to pass separate storage for the lowest and ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ highest registers, even though we don't care about the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ actual values. Otherwise, we will restore only one ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ register from the stack, since lowest will == highest in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `pop_failure_point'. */ ~~~~~~~~~~~~~~~~~~~~~~~~ int dummy_low_reg, dummy_high_reg; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ unsigned char *pdummy; ~~~~~~~~~~~~~~~~~~~~~~ re_char *sdummy = NULL; ~~~~~~~~~~~~~~~~~~~~~~~ USED (sdummy); /* Silence warning. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING pop_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POP_FAILURE_POINT (sdummy, pdummy, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ dummy_low_reg, dummy_high_reg, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ reg_dummy, reg_dummy, reg_info_dummy); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ USED (pdummy); ~~~~~~~~~~~~~~ } ~ /* Note fall through. */ ~~~~~~~~~~~~~~~~~~~~~~~~~ /* Unconditionally jump (without popping any failure points). */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case jump: ~~~~~~~~~~ unconditional_jump: ~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING jump %d ", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p += mcnt; /* Do the jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("(to 0x%zx).\n", (Bytecount) p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* We need this opcode so we can detect where alternatives end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ in `group_match_null_string_p' et al. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case jump_past_alt: ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING jump_past_alt.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ /* Normally, the on_failure_jump pushes a failure point, which ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ then gets popped at pop_failure_jump. We will end up at ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pop_failure_jump, also, and with a pattern of, say, `a+', we ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ are skipping over the on_failure_jump, so we have to push ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ something meaningless for pop_failure_jump to pop. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case dummy_failure_jump: ~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING dummy_failure_jump.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* It doesn't matter what we push for the string here. What ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ the code at `fail' tests is the value for the pattern. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ /* At the end of an alternative, we need to push a dummy failure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ point in case we are followed by a `pop_failure_jump', because ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ we don't want the failure point for the alternative to be ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ popped. For example, matching `(a|ab)*' against `aab' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ requires that we match the `ab' alternative. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case push_dummy_failure: ~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING push_dummy_failure.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* See comments just above at `dummy_failure_jump' about the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ two zeroes. */ ~~~~~~~~~~~~~~~ PUSH_FAILURE_POINT ((re_char *) 0, (re_char *) 0, -2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ /* Have to succeed matching what follows at least n times. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ After that, handle like `on_failure_jump'. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case succeed_n: ~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE (mcnt, p + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Originally, this is how many times we HAVE to succeed. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt) ~~~~~~~~~ { ~ mcnt--; ~~~~~~~ p += 2; ~~~~~~~ DEBUG_MATCH_PRINT3 (" Setting 0x%zx to %d.\n", (Bytecount) p, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt); ~~~~~~ STORE_MATCH_NUMBER_AND_INCR (p, mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ else ~~~~ { ~ DEBUG_MATCH_PRINT2 (" Setting two bytes from 0x%zx to no_op.\n", ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (Bytecount) (p+2)); ~~~~~~~~~~~~~~~~~~~ STORE_MATCH_NUMBER (p + 2, no_op); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto on_failure; ~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case jump_n: ~~~~~~~~~~~~ EXTRACT_NONNEGATIVE (mcnt, p + 2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Originally, this is how many times we CAN jump. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (mcnt) ~~~~~~~~~ { ~ mcnt--; ~~~~~~~ STORE_MATCH_NUMBER (p + 2, mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto unconditional_jump; ~~~~~~~~~~~~~~~~~~~~~~~~ } ~ /* If don't have to jump any more, skip over the rest of command. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ p += 4; ~~~~~~~ break; ~~~~~~ case set_number_at: ~~~~~~~~~~~~~~~~~~~ { ~ unsigned char *p2; /* Location of the counter. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING set_number_at.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NUMBER_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* Discard 'const', making re_match_2_internal() ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ non-reentrant. */ ~~~~~~~~~~~~~~~~~~ p2 = (unsigned char *) p + mcnt; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EXTRACT_NONNEGATIVE_AND_INCR (mcnt, p); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT3 (" Setting 0x%zx to %d.\n", (Bytecount) p2, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt); ~~~~~~ STORE_MATCH_NUMBER (p2, mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ } ~ case wordbound: ~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING wordbound.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ should_succeed = 1; ~~~~~~~~~~~~~~~~~~~ matchwordbound: ~~~~~~~~~~~~~~~ { ~ /* XEmacs change */ ~~~~~~~~~~~~~~~~~~~ /* Straightforward and (I hope) correct implementation. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* emch1 is the character before d, syn1 is the syntax of ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch1, emch2 is the character at d, and syn2 is the ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ syntax of emch2. */ ~~~~~~~~~~~~~~~~~~~ Ichar emch1, emch2; ~~~~~~~~~~~~~~~~~~~ int syn1 = 0, ~~~~~~~~~~~~~ syn2 = 0; ~~~~~~~~~ re_char *d_before, *d_after; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int result, ~~~~~~~~~~~ at_beg = AT_STRINGS_BEG (d), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ at_end = AT_STRINGS_END (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (at_beg && at_end) ~~~~~~~~~~~~~~~~~~~~~ { ~ result = 0; ~~~~~~~~~~~ } ~ else ~~~~ { ~ if (!at_beg) ~~~~~~~~~~~~ { ~ d_before = POS_BEFORE_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (d_before, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch1 = itext_ichar_fmt (d_before, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE (scache, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos ~~~~~~~~~~~~~~~~~~ (lispobj, PTR_TO_OFFSET (d_before))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ syn1 = SYNTAX_FROM_CACHE (scache, emch1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (!at_end) ~~~~~~~~~~~~ { ~ d_after = POS_AFTER_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch2 = itext_ichar_fmt (d_after, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE_FORWARD (scache, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ offset_to_bytexpos ~~~~~~~~~~~~~~~~~~ (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ syn2 = SYNTAX_FROM_CACHE (scache, emch2); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ } ~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (at_beg) ~~~~~~~~~~~ result = (syn2 == Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~ else if (at_end) ~~~~~~~~~~~~~~~~ result = (syn1 == Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~ else ~~~~ result = ((syn1 == Sword) != (syn2 == Sword)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ if (result == should_succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ } ~ case notwordbound: ~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING notwordbound.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ should_succeed = 0; ~~~~~~~~~~~~~~~~~~~ goto matchwordbound; ~~~~~~~~~~~~~~~~~~~~ case wordbeg: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING wordbeg.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ { ~ /* XEmacs: this originally read: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ */ ~~ re_char *dtmp = POS_AFTER_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Ichar emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int tempres; ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE ~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ dtmp = POS_BEFORE_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (dtmp, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE_BACKWARD ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (dtmp))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ } ~ case wordend: ~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING wordend.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (AT_STRINGS_BEG (d)) ~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ { ~ /* XEmacs: this originally read: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ The or condition is incorrect (reversed). ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ~~ re_char *dtmp; ~~~~~~~~~~~~~~ Ichar emch; ~~~~~~~~~~~ int tempres; ~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE ~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif ~~~~~~ dtmp = POS_BEFORE_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_IBYTEPTR_FMT (dtmp, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ if (AT_STRINGS_END (d)) ~~~~~~~~~~~~~~~~~~~~~~~ break; ~~~~~~ dtmp = POS_AFTER_GAP_UNSAFE (d); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (dtmp, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef emacs ~~~~~~~~~~~~ { ~ re_char *next = d; ~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (next, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE_FORWARD ~~~~~~~~~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (next))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } ~ #endif ~~~~~~ tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (tempres) ~~~~~~~~~~~~ break; ~~~~~~ goto fail; ~~~~~~~~~~ } ~ #ifdef emacs ~~~~~~~~~~~~ case before_dot: ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING before_dot.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!BUFFERP (lispobj) ~~~~~~~~~~~~~~~~~~~~~~ || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >= BUF_PT (XBUFFER (lispobj)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ break; ~~~~~~ case at_dot: ~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING at_dot.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!BUFFERP (lispobj) ~~~~~~~~~~~~~~~~~~~~~~ || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ != BUF_PT (XBUFFER (lispobj)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ break; ~~~~~~ case after_dot: ~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING after_dot.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if (!BUFFERP (lispobj) ~~~~~~~~~~~~~~~~~~~~~~ || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <= BUF_PT (XBUFFER (lispobj)))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ break; ~~~~~~ case syntaxspec: ~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ goto matchsyntax; ~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING Emacs wordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = (int) Sword; ~~~~~~~~~~~~~~~~~~~ matchsyntax: ~~~~~~~~~~~~ should_succeed = 1; ~~~~~~~~~~~~~~~~~~~ matchornotsyntax: ~~~~~~~~~~~~~~~~~ { ~ int matches; ~~~~~~~~~~~~ Ichar emch; ~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ UPDATE_SYNTAX_CACHE ~~~~~~~~~~~~~~~~~~~ (scache, ~~~~~~~~ offset_to_bytexpos (lispobj, PTR_TO_OFFSET (d))); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEGIN_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~~~ matches = (SYNTAX_FROM_CACHE (scache, emch) == ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (enum syntaxcode) mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~ END_REGEX_MALLOC_OK (); ~~~~~~~~~~~~~~~~~~~~~~~ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (matches != should_succeed) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case notsyntaxspec: ~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ goto matchnotsyntax; ~~~~~~~~~~~~~~~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING Emacs notwordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt = (int) Sword; ~~~~~~~~~~~~~~~~~~~ matchnotsyntax: ~~~~~~~~~~~~~~~ should_succeed = 0; ~~~~~~~~~~~~~~~~~~~ goto matchornotsyntax; ~~~~~~~~~~~~~~~~~~~~~~ #ifdef MULE ~~~~~~~~~~~ /* 97/2/17 jhod Mule category code patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ case categoryspec: ~~~~~~~~~~~~~~~~~~ should_succeed = 1; ~~~~~~~~~~~~~~~~~~~ matchornotcategory: ~~~~~~~~~~~~~~~~~~~ { ~ Ichar emch; ~~~~~~~~~~~ mcnt = *p++; ~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ emch = itext_ichar_fmt (d, fmt, lispobj); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INC_IBYTEPTR_FMT (d, fmt); ~~~~~~~~~~~~~~~~~~~~~~~~~~ if (check_char_in_category (emch, BUFFER_CATEGORY_TABLE (lispbuf), ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mcnt, should_succeed)) ~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ } ~ break; ~~~~~~ case notcategoryspec: ~~~~~~~~~~~~~~~~~~~~~ should_succeed = 0; ~~~~~~~~~~~~~~~~~~~ goto matchornotcategory; ~~~~~~~~~~~~~~~~~~~~~~~~ /* end of category patch */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ #endif /* MULE */ ~~~~~~~~~~~~~~~~~ #else /* not emacs */ ~~~~~~~~~~~~~~~~~~~~~ case wordchar: ~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (!WORDCHAR_P ((int) (*d))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ d++; ~~~~ break; ~~~~~~ case notwordchar: ~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ REGEX_PREFETCH (); ~~~~~~~~~~~~~~~~~~ if (!WORDCHAR_P ((int) (*d))) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ goto fail; ~~~~~~~~~~ SET_REGS_MATCHED (); ~~~~~~~~~~~~~~~~~~~~ d++; ~~~~ break; ~~~~~~ #endif /* emacs */ ~~~~~~~~~~~~~~~~~~ default: ~~~~~~~~ ABORT (); ~~~~~~~~~ } ~ continue; /* Successfully executed one pattern command; keep going. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* We goto here if a matching operation fails. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fail: ~~~~~ if (!FAIL_STACK_EMPTY ()) ~~~~~~~~~~~~~~~~~~~~~~~~~ { /* A restart point is known. Restore to that state. */ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEBUG_MATCH_PRINT1 ("\nFAIL:\n"); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ POP_FAILURE_POINT (d, p, ~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:7173:11: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (d, p, ^~~~~~~~~~~~~~~~~ regex.c:1920:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'Bytecount {aka long int}' [-Wformat=] DEBUG_FAIL_PRINT2 (" info: 0x%zx\n", \ ^ * (Bytecount *) ®_info[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:7173:11: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (d, p, ^~~~~~~~~~~~~~~~~ regex.c:1922:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" end: 0x%zx\n", \ ^ (Bytecount) regend[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:7173:11: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (d, p, ^~~~~~~~~~~~~~~~~ regex.c:1924:23: warning: format '%zx' expects argument of type 'size_t', but argument 2 has type 'long int' [-Wformat=] DEBUG_FAIL_PRINT2 (" start: 0x%zx\n", \ ^ (Bytecount) regstart[this_reg]); \ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ regex.c:774:55: note: in definition of macro 'DEBUG_FAIL_PRINT2' if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) ^~ regex.c:7173:11: note: in expansion of macro 'POP_FAILURE_POINT' POP_FAILURE_POINT (d, p, ^~~~~~~~~~~~~~~~~ --- sequence.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include sequence.c In file included from sequence.c:20:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- signal.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include signal.c In file included from signal.c:22:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- sound.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include sound.c In file included from sound.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- specifier.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include specifier.c In file included from specifier.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- strftime.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include strftime.c In file included from strftime.c:89:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ strftime.c: In function 'add_num_time_t': strftime.c:199:49: warning: format '%zu' expects argument of type 'size_t', but argument 4 has type 'long unsigned int' [-Wformat=] emacs_snprintf_ascbyte (buf, sizeof (buf), "%zu", (EMACS_UINT) num); ~~^ ~~~~~~~~~~~~~~~~ %lu --- symbols.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include symbols.c In file included from symbols.c:52:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- syntax.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include syntax.c In file included from syntax.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- sysdep.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include sysdep.c In file included from sysdep.c:39:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- sysdll.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include sysdll.c In file included from sysdll.c:25:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- terminfo.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include terminfo.c --- tests.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include tests.c In file included from tests.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- text.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include text.c In file included from text.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- tls.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include tls.c In file included from tls.c:23:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- toolbar.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include toolbar.c In file included from toolbar.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- undo.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include undo.c In file included from undo.c:23:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- unicode.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include unicode.c In file included from unicode.c:35:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- widget.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include widget.c In file included from widget.c:27:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- unicode.o --- unicode.c: In function 'print_precedence_array': unicode.c:1643:46: warning: format '%zd' expects argument of type 'signed size_t', but argument 3 has type 'long int' [-Wformat=] write_fmt_string (printcharfun, " length=%zd", ~~^ %ld --- window.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include window.c In file included from window.c:37:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ --- event-Xt.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include event-Xt.c In file included from event-Xt.c:32:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ event-Xt.c: In function 'x_reset_modifier_mapping': event-Xt.c:1494:2: warning: 'XKeycodeToKeysym' is deprecated [-Wdeprecated-declarations] KeySym sym = (code ? XKeycodeToKeysym (display, code, column) : 0); ^~~~~~ In file included from events.h:571:0, from event-Xt.c:41: /pbulk/work/editors/xemacs-current/work/.buildlink/include/X11/Xlib.h:1687:15: note: declared here extern KeySym XKeycodeToKeysym( ^~~~~~~~~~~~~~~~ --- TransientEmacsShell.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include -DDEFINE_TRANSIENT_EMACS_SHELL /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/EmacsShell-sub.c In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/EmacsShell-sub.c:79:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ mv EmacsShell-sub.o TransientEmacsShell.o --- regex.o --- regex.c: In function 're_search_2': regex.c:1503:8: warning: 'd' may be used uninitialized in this function [-Wmaybe-uninitialized] && (re_char *) (val) <= (re_char *) string1 + size1) \ ^~ regex.c:5016:12: note: 'd' was declared here re_char *d; ^ --- dump-id.c --- ../lib-src/make-dump-id --- dump-id.o --- gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include dump-id.c --- temacs --- if test -f dump-size ; then gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include -DMAX_SIZE=`cat dump-size` /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/dump-data.c ; else gcc -c -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -DTERMINFO -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Demacs -I. -I/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src -DHAVE_CONFIG_H -I/usr/pkg/include -I/usr/X11R7/include -I/usr/X11R7/include -DMAX_SIZE=0 /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/dump-data.c ; fi In file included from /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/dump-data.c:28:0: ./config.h:868:16: warning: "__builtin_alloca" is not defined, evaluates to 0 [-Wundef] #define alloca __builtin_alloca ^ gcc -Wall -Wno-switch -Wundef -Wsign-compare -Wno-char-subscripts -Wpacked -Wpointer-arith -Wshadow -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wdeclaration-after-statement -g -O2 -Dunix -no-pie -I/usr/pkg/include -I/usr/include -I/usr/X11R7/include -Wl,-zrelro -L/usr/pkg/lib -Wl,-R/usr/pkg/lib -L/usr/lib -Wl,-R/usr/lib -L/usr/X11R7/lib -Wl,-R/usr/X11R7/lib -L/usr/pkg/lib -L/usr/X11R7/lib -Wl,-R/usr/pkg/lib:/usr/X11R7/lib -L/usr/X11R7/lib -R/usr/X11R7/lib -Wl,-export-dynamic -o temacs abbrev.o alloc.o alloca.o array.o balloon_help.o balloon-x.o blocktype.o buffer.o bytecode.o callint.o casefiddle.o casetab.o chartab.o filelock.o cmdloop.o cmds.o console.o console-stream.o data.o database.o debug.o tests.o device.o dired.o doc.o doprnt.o editfns.o elhash.o emacs.o emodules.o eval.o events.o event-stream.o event-unixoid.o dumper.o input-method-xlib.o inline.o linuxplay.o miscplay.o terminfo.o extents.o faces.o file-coding.o fileio.o filemode.o floatfns.o fns.o font-lock.o frame.o gc.o general.o glyphs.o glyphs-eimage.o glyphs-shared.o glyphs-widget.o gui.o menubar.o scrollbar.o dialog.o toolbar.o gutter.o imgproc.o indent.o insdel.o intl.o keymap.o line-number.o lread.o lstream.o macros.o marker.o md5.o minibuf.o mule-ccl.o mule-charset.o mule-coding.o number-gmp.o number.o fontcolor.o opaque.o print.o process.o process-unix.o profile.o rangetab.o realpath.o redisplay.o redisplay-output.o regex.o search.o select.o sequence.o sysdll.o signal.o sound.o specifier.o strftime.o symbols.o syntax.o sysdep.o text.o tls.o console-tty.o device-tty.o event-tty.o frame-tty.o fontcolor-tty.o redisplay-tty.o cm.o undo.o unicode.o console-x.o device-x.o event-Xt.o frame-x.o glyphs-x.o fontcolor-x.o redisplay-x.o select-x.o gccache-x.o intl-x.o gui-x.o menubar-x.o scrollbar-x.o dialog-x.o toolbar-x.o toolbar-xlike.o widget.o window.o lastfile.o ralloc.o EmacsFrame.o EmacsShell.o TopLevelEmacsShell.o TransientEmacsShell.o EmacsManager.o dump-id.o dump-data.o ../lwlib/liblw.a -lXaw -ltiff -lpng -ljpeg -lz -lXpm -lXmu -lXt -lXext -lX11 -lSM -lICE -lgmp -lgdbm -ltermcap -lossaudio -lintl -lm -lkvm -lutil --- ./NEEDTODUMP --- ./temacs -nd -no-packages -no-configured-paths -batch -l ../lisp/update-elc.el Loading ../lisp/update-elc.el... Loading find-paths.el... Loading packages.el... Loading setup-paths.el... Loading /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lisp/dumped-lisp.el... Wrote /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src/NEEDTODUMP Loading loadup-el.el... Loading loadup.el... Using load-path (/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lisp/) Using module-load-path (/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/modules) Loading dumped-lisp.el... Loading backquote.el... Loading bytecomp-runtime.el... Loading subr.el... Loading cl.el... Loading cl-extra.el... Loading cl-macs.el... Loading cl-seq.el... Loading post-gc.el... Loading version.el... Loading custom.el... Loading cus-start.el... Loading find-paths.el... Loading packages.el... Loading setup-paths.el... Loading replace.el... Loading widget.el... Loading cmdloop.el... Loading keymap.el... Loading syntax.el... Loading syntax-ppss.el... Loading device.el... Loading console.el... Loading obsolete.el... Loading specifier.el... Loading menubar.el... Loading menubar-items.el... Loading font-menu.el... Loading frame.el... Loading x-faces.el... Loading x-font-menu.el... Loading faces.el... Requiring font.el... Requiring fontconfig.el... Requiring font-mgr.el... Requiring disp-table.el... Loading glyphs.el... Loading fontcolor.el... Loading extents.el... Loading events.el... Loading hash-table.el... Loading text-props.el... Loading process.el... Loading multicast.el... Loading map-ynp.el... Loading undo-stack.el... Loading window.el... Loading window-xemacs.el... Loading resize-minibuffer.el... Loading simple.el... Loading newcomment.el... Loading keydefs.el... Loading abbrev.el... Loading derived.el... Loading minibuf.el... Loading list-mode.el... Loading modeline.el... Loading cus-file.el... Loading startup.el... Loading misc.el... Loading loadhist.el... Loading files.el... Loading lib-complete.el... Loading format.el... Loading indent.el... Loading isearch-mode.el... *** Error in XEmacs initialization (void-variable isearch-state-places) *** Backtrace really-early-error-handler((void-variable isearch-state-places)) (cons (quote list) isearch-state-places) # (catch #:isearch-make-state-object ...) (catch (quote #:isearch-make-state-object) (cons (quote list) isearch-state-places)) (block isearch-make-state-object (cons (quote list) isearch-state-places)) (lambda nil (block isearch-make-state-object (cons (quote list) isearch-state-places)))() # bind (byte-compile-macro-environment) macroexpand((isearch-make-state-object) ((isearch-setf-state-places lambda (cmd) (block isearch-setf-state-places (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd)))))))) (isearch-make-state-object lambda nil (block isearch-make-state-object (cons (quote list) isearch-state-places))) (-203850872 (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets))))) (setq form (macroexpand form env)) (eq form (setq form (macroexpand form env))) (not (eq form (setq form (macroexpand form env)))) (or (not (eq form (setq form (macroexpand form env)))) (and cl-macroexpand-cmacs (not (eq form (setq form (compiler-macroexpand form)))))) (while (or (not (eq form (setq form (macroexpand form env)))) (and cl-macroexpand-cmacs (not (eq form (setq form (compiler-macroexpand form))))))) # bind (env form) cl-macroexpand-all((isearch-make-state-object) ((isearch-setf-state-places lambda (cmd) (block isearch-setf-state-places (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd)))))))) (isearch-make-state-object lambda nil (block isearch-make-state-object (cons (quote list) isearch-state-places))) (-203850872 (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets))))) # bind (x) (lambda (x) (cl-macroexpand-all x env))((isearch-make-state-object)) mapcar((lambda (x) (cl-macroexpand-all x env)) ((isearch-make-state-object) isearch-cmds)) # bind (env body) cl-macroexpand-body(((isearch-make-state-object) isearch-cmds) ((isearch-setf-state-places lambda (cmd) (block isearch-setf-state-places (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd)))))))) (isearch-make-state-object lambda nil (block isearch-make-state-object (cons (quote list) isearch-state-places))) (-203850872 (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets))))) (cons (car form) (cl-macroexpand-body (cdr form) env)) (cond ((not (consp form)) form) ((memq (car form) (quote (let let*))) (if (null (nth 1 form)) (cl-macroexpand-all (cons (quote progn) (cddr form)) env) (let ((letf nil) (res nil) (lets (cadr form)) (env env) shadows) (while lets (push (funcall (function (lambda (symbol valueform) (let* ((eq-hash (eq-hash symbol)) (acons (cdr (assoc* eq-hash env)))) (if (null acons) (cons symbol (cl-macroexpand-body valueform env)) (if (cadr acons) (if (eq (car form) (quote let*)) (push (backquote ((\, eq-hash))) env) (push (backquote ((\, eq-hash))) shadows)) (setq symbol (car acons)) (unless (symbolp symbol) (setq letf t))) (cons symbol (cl-macroexpand-body valueform env)))))) (if (consp (car lets)) (caar lets) (car lets)) (cdr-safe (car-safe lets))) res) (setq lets (cdr lets))) (list* (if letf (if (eq (car form) (quote let)) (quote letf) (quote letf*)) (car form)) (nreverse res) (cl-macroexpand-body (cddr form) (nconc shadows env)))))) ((eq (car form) (quote cond)) (cons (car form) (mapcar (function (lambda (x) (cl-macroexpand-body x env))) (cdr form)))) ((eq (car form) (quote condition-case)) (list* (car form) (nth 1 form) (cl-macroexpand-all (nth 2 form) env) (mapcar (function (lambda (x) (cons (car x) (cl-macroexpand-body (cdr x) env)))) (cdddr form)))) ((memq (car form) (quote (quote function))) (if (eq (car-safe (nth 1 form)) (quote lambda)) (let* ((env (reduce (function nconc) (nth 1 (nth 1 form)) :from-end t :key (function (lambda (symbol) (when (and (not (member symbol (quote (&optional &rest)))) (cdr (assoc* (setq symbol (eq-hash symbol)) env))) (backquote (((\, symbol))))))) :initial-value env)) (body (cl-macroexpand-body (cddadr form) env))) (if (and cl-closure-vars (eq (car form) (quote function)) (cl-expr-contains-any body cl-closure-vars)) (let* ((closed (remove* nil cl-closure-vars :key (function (lambda (y) (cl-expr-contains body y))))) (new (mapcar (quote gensym) closed)) (sub (pairlis closed new))) (put (car (last cl-closure-vars)) (quote used) t) (backquote (apply-partially (function (lambda (\, (append new (cadadr form))) (\,@ (sublis sub body)))) (\,@ closed)))) (list (car form) (list* (quote lambda) (cadadr form) body)))) (let ((found (cdr (assq (cadr form) env)))) (cond ((and (consp found) (eq (nth 1 (nth 1 found)) (quote cl-labels-args))) (cl-macroexpand-all (nth 1 (nth 2 (nth 2 found))) env)) ((and (consp found) (eq (nth 1 (nth 1 found)) (quote byte-compile-labels-args))) (unless (eq (quote function) (car form)) (byte-compile-warn "deprecated: '%s, use #'%s instead to quote it as a function" (cadr form) (cadr form))) (setq found (get (nth 1 (nth 1 (nth 3 found))) (quote byte-compile-data-placeholder))) (put found (quote byte-compile-label-calls) (1+ (get found (quote byte-compile-label-calls) 0))) (list (quote function) found)) (t form))))) ((memq (car form) (quote (defun defmacro))) (let ((env (reduce (function nconc) (nth 2 form) :from-end t :key (function (lambda (symbol) (when (and (not (member symbol (quote (&optional &rest)))) (cdr (assoc* (setq symbol (eq-hash symbol)) env))) (backquote (((\, symbol))))))) :initial-value env))) (list* (car form) (nth 1 form) (cl-macroexpand-body (cddr form) env)))) ((and (eq (car form) (quote progn)) (not (cddr form))) (cl-macroexpand-all (nth 1 form) env)) ((eq (car form) (quote setq)) (let* ((args (cl-macroexpand-body (cdr form) env)) (p args)) (while (and p (symbolp (car p))) (setq p (cddr p))) (if p (cl-macroexpand-all (cons (quote setf) args)) (cons (quote setq) args)))) (t (cons (car form) (cl-macroexpand-body (cdr form) env)))) # bind (env form) cl-macroexpand-all((cons (isearch-make-state-object) isearch-cmds) ((isearch-setf-state-places lambda (cmd) (block isearch-setf-state-places (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd)))))))) (isearch-make-state-object lambda nil (block isearch-make-state-object (cons (quote list) isearch-state-places))) (-203850872 (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets))))) # bind (x) (lambda (x) (cl-macroexpand-all x env))((cons (isearch-make-state-object) isearch-cmds)) mapcar((lambda (x) (cl-macroexpand-all x env)) (isearch-cmds (cons (isearch-make-state-object) isearch-cmds))) # bind (env body) cl-macroexpand-body((isearch-cmds (cons (isearch-make-state-object) isearch-cmds)) ((isearch-setf-state-places lambda (cmd) (block isearch-setf-state-places (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd)))))))) (isearch-make-state-object lambda nil (block isearch-make-state-object (cons (quote list) isearch-state-places))) (-203850872 (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets))))) (let* ((args (cl-macroexpand-body (cdr form) env)) (p args)) (while (and p (symbolp (car p))) (setq p (cddr p))) (if p (cl-macroexpand-all (cons (quote setf) args)) (cons (quote setq) args))) (cond ((not (consp form)) form) ((memq (car form) (quote (let let*))) (if (null (nth 1 form)) (cl-macroexpand-all (cons (quote progn) (cddr form)) env) (let ((letf nil) (res nil) (lets (cadr form)) (env env) shadows) (while lets (push (funcall (function (lambda (symbol valueform) (let* ((eq-hash (eq-hash symbol)) (acons (cdr (assoc* eq-hash env)))) (if (null acons) (cons symbol (cl-macroexpand-body valueform env)) (if (cadr acons) (if (eq (car form) (quote let*)) (push (backquote ((\, eq-hash))) env) (push (backquote ((\, eq-hash))) shadows)) (setq symbol (car acons)) (unless (symbolp symbol) (setq letf t))) (cons symbol (cl-macroexpand-body valueform env)))))) (if (consp (car lets)) (caar lets) (car lets)) (cdr-safe (car-safe lets))) res) (setq lets (cdr lets))) (list* (if letf (if (eq (car form) (quote let)) (quote letf) (quote letf*)) (car form)) (nreverse res) (cl-macroexpand-body (cddr form) (nconc shadows env)))))) ((eq (car form) (quote cond)) (cons (car form) (mapcar (function (lambda (x) (cl-macroexpand-body x env))) (cdr form)))) ((eq (car form) (quote condition-case)) (list* (car form) (nth 1 form) (cl-macroexpand-all (nth 2 form) env) (mapcar (function (lambda (x) (cons (car x) (cl-macroexpand-body (cdr x) env)))) (cdddr form)))) ((memq (car form) (quote (quote function))) (if (eq (car-safe (nth 1 form)) (quote lambda)) (let* ((env (reduce (function nconc) (nth 1 (nth 1 form)) :from-end t :key (function (lambda (symbol) (when (and (not (member symbol (quote (&optional &rest)))) (cdr (assoc* (setq symbol (eq-hash symbol)) env))) (backquote (((\, symbol))))))) :initial-value env)) (body (cl-macroexpand-body (cddadr form) env))) (if (and cl-closure-vars (eq (car form) (quote function)) (cl-expr-contains-any body cl-closure-vars)) (let* ((closed (remove* nil cl-closure-vars :key (function (lambda (y) (cl-expr-contains body y))))) (new (mapcar (quote gensym) closed)) (sub (pairlis closed new))) (put (car (last cl-closure-vars)) (quote used) t) (backquote (apply-partially (function (lambda (\, (append new (cadadr form))) (\,@ (sublis sub body)))) (\,@ closed)))) (list (car form) (list* (quote lambda) (cadadr form) body)))) (let ((found (cdr (assq (cadr form) env)))) (cond ((and (consp found) (eq (nth 1 (nth 1 found)) (quote cl-labels-args))) (cl-macroexpand-all (nth 1 (nth 2 (nth 2 found))) env)) ((and (consp found) (eq (nth 1 (nth 1 found)) (quote byte-compile-labels-args))) (unless (eq (quote function) (car form)) (byte-compile-warn "deprecated: '%s, use #'%s instead to quote it as a function" (cadr form) (cadr form))) (setq found (get (nth 1 (nth 1 (nth 3 found))) (quote byte-compile-data-placeholder))) (put found (quote byte-compile-label-calls) (1+ (get found (quote byte-compile-label-calls) 0))) (list (quote function) found)) (t form))))) ((memq (car form) (quote (defun defmacro))) (let ((env (reduce (function nconc) (nth 2 form) :from-end t :key (function (lambda (symbol) (when (and (not (member symbol (quote (&optional &rest)))) (cdr (assoc* (setq symbol (eq-hash symbol)) env))) (backquote (((\, symbol))))))) :initial-value env))) (list* (car form) (nth 1 form) (cl-macroexpand-body (cddr form) env)))) ((and (eq (car form) (quote progn)) (not (cddr form))) (cl-macroexpand-all (nth 1 form) env)) ((eq (car form) (quote setq)) (let* ((args (cl-macroexpand-body (cdr form) env)) (p args)) (while (and p (symbolp (car p))) (setq p (cddr p))) (if p (cl-macroexpand-all (cons (quote setf) args)) (cons (quote setq) args)))) (t (cons (car form) (cl-macroexpand-body (cdr form) env)))) # bind (env form) cl-macroexpand-all((push (isearch-make-state-object) isearch-cmds) ((isearch-setf-state-places lambda (cmd) (block isearch-setf-state-places (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd)))))))) (isearch-make-state-object lambda nil (block isearch-make-state-object (cons (quote list) isearch-state-places))) (-203850872 (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets))))) # bind (x) (lambda (x) (cl-macroexpand-all x env))((push (isearch-make-state-object) isearch-cmds)) mapcar((lambda (x) (cl-macroexpand-all x env)) (nil (push (isearch-make-state-object) isearch-cmds))) # bind (env body) cl-macroexpand-body((nil (push (isearch-make-state-object) isearch-cmds)) ((isearch-setf-state-places lambda (cmd) (block isearch-setf-state-places (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd)))))))) (isearch-make-state-object lambda nil (block isearch-make-state-object (cons (quote list) isearch-state-places))) (-203850872 (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets))))) (list* (car form) (nth 1 form) (cl-macroexpand-body (cddr form) env)) # bind (env) (let ((env (reduce (function nconc) (nth 2 form) :from-end t :key (function (lambda (symbol) (when (and (not (member symbol (quote (&optional &rest)))) (cdr (assoc* (setq symbol (eq-hash symbol)) env))) (backquote (((\, symbol))))))) :initial-value env))) (list* (car form) (nth 1 form) (cl-macroexpand-body (cddr form) env))) (cond ((not (consp form)) form) ((memq (car form) (quote (let let*))) (if (null (nth 1 form)) (cl-macroexpand-all (cons (quote progn) (cddr form)) env) (let ((letf nil) (res nil) (lets (cadr form)) (env env) shadows) (while lets (push (funcall (function (lambda (symbol valueform) (let* ((eq-hash (eq-hash symbol)) (acons (cdr (assoc* eq-hash env)))) (if (null acons) (cons symbol (cl-macroexpand-body valueform env)) (if (cadr acons) (if (eq (car form) (quote let*)) (push (backquote ((\, eq-hash))) env) (push (backquote ((\, eq-hash))) shadows)) (setq symbol (car acons)) (unless (symbolp symbol) (setq letf t))) (cons symbol (cl-macroexpand-body valueform env)))))) (if (consp (car lets)) (caar lets) (car lets)) (cdr-safe (car-safe lets))) res) (setq lets (cdr lets))) (list* (if letf (if (eq (car form) (quote let)) (quote letf) (quote letf*)) (car form)) (nreverse res) (cl-macroexpand-body (cddr form) (nconc shadows env)))))) ((eq (car form) (quote cond)) (cons (car form) (mapcar (function (lambda (x) (cl-macroexpand-body x env))) (cdr form)))) ((eq (car form) (quote condition-case)) (list* (car form) (nth 1 form) (cl-macroexpand-all (nth 2 form) env) (mapcar (function (lambda (x) (cons (car x) (cl-macroexpand-body (cdr x) env)))) (cdddr form)))) ((memq (car form) (quote (quote function))) (if (eq (car-safe (nth 1 form)) (quote lambda)) (let* ((env (reduce (function nconc) (nth 1 (nth 1 form)) :from-end t :key (function (lambda (symbol) (when (and (not (member symbol (quote (&optional &rest)))) (cdr (assoc* (setq symbol (eq-hash symbol)) env))) (backquote (((\, symbol))))))) :initial-value env)) (body (cl-macroexpand-body (cddadr form) env))) (if (and cl-closure-vars (eq (car form) (quote function)) (cl-expr-contains-any body cl-closure-vars)) (let* ((closed (remove* nil cl-closure-vars :key (function (lambda (y) (cl-expr-contains body y))))) (new (mapcar (quote gensym) closed)) (sub (pairlis closed new))) (put (car (last cl-closure-vars)) (quote used) t) (backquote (apply-partially (function (lambda (\, (append new (cadadr form))) (\,@ (sublis sub body)))) (\,@ closed)))) (list (car form) (list* (quote lambda) (cadadr form) body)))) (let ((found (cdr (assq (cadr form) env)))) (cond ((and (consp found) (eq (nth 1 (nth 1 found)) (quote cl-labels-args))) (cl-macroexpand-all (nth 1 (nth 2 (nth 2 found))) env)) ((and (consp found) (eq (nth 1 (nth 1 found)) (quote byte-compile-labels-args))) (unless (eq (quote function) (car form)) (byte-compile-warn "deprecated: '%s, use #'%s instead to quote it as a function" (cadr form) (cadr form))) (setq found (get (nth 1 (nth 1 (nth 3 found))) (quote byte-compile-data-placeholder))) (put found (quote byte-compile-label-calls) (1+ (get found (quote byte-compile-label-calls) 0))) (list (quote function) found)) (t form))))) ((memq (car form) (quote (defun defmacro))) (let ((env (reduce (function nconc) (nth 2 form) :from-end t :key (function (lambda (symbol) (when (and (not (member symbol (quote (&optional &rest)))) (cdr (assoc* (setq symbol (eq-hash symbol)) env))) (backquote (((\, symbol))))))) :initial-value env))) (list* (car form) (nth 1 form) (cl-macroexpand-body (cddr form) env)))) ((and (eq (car form) (quote progn)) (not (cddr form))) (cl-macroexpand-all (nth 1 form) env)) ((eq (car form) (quote setq)) (let* ((args (cl-macroexpand-body (cdr form) env)) (p args)) (while (and p (symbolp (car p))) (setq p (cddr p))) (if p (cl-macroexpand-all (cons (quote setf) args)) (cons (quote setq) args)))) (t (cons (car form) (cl-macroexpand-body (cdr form) env)))) # bind (env form) cl-macroexpand-all((defun isearch-push-state nil (push (isearch-make-state-object) isearch-cmds)) ((isearch-setf-state-places lambda (cmd) (block isearch-setf-state-places (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd)))))))) (isearch-make-state-object lambda nil (block isearch-make-state-object (cons (quote list) isearch-state-places))) (-203850872 (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets))))) # bind (x) (lambda (x) (cl-macroexpand-all x env))((defun isearch-push-state nil (push (isearch-make-state-object) isearch-cmds))) mapcar((lambda (x) (cl-macroexpand-all x env)) ((defun isearch-push-state nil (push (isearch-make-state-object) isearch-cmds)) (defun isearch-top-state nil (isearch-setf-state-places (car isearch-cmds))))) # bind (env body) cl-macroexpand-body(((defun isearch-push-state nil (push (isearch-make-state-object) isearch-cmds)) (defun isearch-top-state nil (isearch-setf-state-places (car isearch-cmds)))) ((isearch-setf-state-places lambda (cmd) (block isearch-setf-state-places (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd)))))))) (isearch-make-state-object lambda nil (block isearch-make-state-object (cons (quote list) isearch-state-places))) (-203850872 (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets))))) (cons (car form) (cl-macroexpand-body (cdr form) env)) (cond ((not (consp form)) form) ((memq (car form) (quote (let let*))) (if (null (nth 1 form)) (cl-macroexpand-all (cons (quote progn) (cddr form)) env) (let ((letf nil) (res nil) (lets (cadr form)) (env env) shadows) (while lets (push (funcall (function (lambda (symbol valueform) (let* ((eq-hash (eq-hash symbol)) (acons (cdr (assoc* eq-hash env)))) (if (null acons) (cons symbol (cl-macroexpand-body valueform env)) (if (cadr acons) (if (eq (car form) (quote let*)) (push (backquote ((\, eq-hash))) env) (push (backquote ((\, eq-hash))) shadows)) (setq symbol (car acons)) (unless (symbolp symbol) (setq letf t))) (cons symbol (cl-macroexpand-body valueform env)))))) (if (consp (car lets)) (caar lets) (car lets)) (cdr-safe (car-safe lets))) res) (setq lets (cdr lets))) (list* (if letf (if (eq (car form) (quote let)) (quote letf) (quote letf*)) (car form)) (nreverse res) (cl-macroexpand-body (cddr form) (nconc shadows env)))))) ((eq (car form) (quote cond)) (cons (car form) (mapcar (function (lambda (x) (cl-macroexpand-body x env))) (cdr form)))) ((eq (car form) (quote condition-case)) (list* (car form) (nth 1 form) (cl-macroexpand-all (nth 2 form) env) (mapcar (function (lambda (x) (cons (car x) (cl-macroexpand-body (cdr x) env)))) (cdddr form)))) ((memq (car form) (quote (quote function))) (if (eq (car-safe (nth 1 form)) (quote lambda)) (let* ((env (reduce (function nconc) (nth 1 (nth 1 form)) :from-end t :key (function (lambda (symbol) (when (and (not (member symbol (quote (&optional &rest)))) (cdr (assoc* (setq symbol (eq-hash symbol)) env))) (backquote (((\, symbol))))))) :initial-value env)) (body (cl-macroexpand-body (cddadr form) env))) (if (and cl-closure-vars (eq (car form) (quote function)) (cl-expr-contains-any body cl-closure-vars)) (let* ((closed (remove* nil cl-closure-vars :key (function (lambda (y) (cl-expr-contains body y))))) (new (mapcar (quote gensym) closed)) (sub (pairlis closed new))) (put (car (last cl-closure-vars)) (quote used) t) (backquote (apply-partially (function (lambda (\, (append new (cadadr form))) (\,@ (sublis sub body)))) (\,@ closed)))) (list (car form) (list* (quote lambda) (cadadr form) body)))) (let ((found (cdr (assq (cadr form) env)))) (cond ((and (consp found) (eq (nth 1 (nth 1 found)) (quote cl-labels-args))) (cl-macroexpand-all (nth 1 (nth 2 (nth 2 found))) env)) ((and (consp found) (eq (nth 1 (nth 1 found)) (quote byte-compile-labels-args))) (unless (eq (quote function) (car form)) (byte-compile-warn "deprecated: '%s, use #'%s instead to quote it as a function" (cadr form) (cadr form))) (setq found (get (nth 1 (nth 1 (nth 3 found))) (quote byte-compile-data-placeholder))) (put found (quote byte-compile-label-calls) (1+ (get found (quote byte-compile-label-calls) 0))) (list (quote function) found)) (t form))))) ((memq (car form) (quote (defun defmacro))) (let ((env (reduce (function nconc) (nth 2 form) :from-end t :key (function (lambda (symbol) (when (and (not (member symbol (quote (&optional &rest)))) (cdr (assoc* (setq symbol (eq-hash symbol)) env))) (backquote (((\, symbol))))))) :initial-value env))) (list* (car form) (nth 1 form) (cl-macroexpand-body (cddr form) env)))) ((and (eq (car form) (quote progn)) (not (cddr form))) (cl-macroexpand-all (nth 1 form) env)) ((eq (car form) (quote setq)) (let* ((args (cl-macroexpand-body (cdr form) env)) (p args)) (while (and p (symbolp (car p))) (setq p (cddr p))) (if p (cl-macroexpand-all (cons (quote setf) args)) (cons (quote setq) args)))) (t (cons (car form) (cl-macroexpand-body (cdr form) env)))) # bind (env form) cl-macroexpand-all((progn (defun isearch-push-state nil (push (isearch-make-state-object) isearch-cmds)) (defun isearch-top-state nil (isearch-setf-state-places (car isearch-cmds)))) ((isearch-setf-state-places lambda (cmd) (block isearch-setf-state-places (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd)))))))) (isearch-make-state-object lambda nil (block isearch-make-state-object (cons (quote list) isearch-state-places))) (-203850872 (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets))))) # (catch #:macrolet ...) (catch (quote #:macrolet) (cl-macroexpand-all (cons (quote progn) form) (nconc (catch (quote #:nil) (let* ((#:G36060 macros) (details nil) (name nil) (#:G36061 nil)) (while (consp #:G36060) (setq details (car #:G36060) name (car-safe (prog1 details (setq details (cdr details))))) (setq #:G36061 (cons (list* name (quote lambda) (cdr (cl-transform-lambda details name))) #:G36061)) (setq #:G36060 (cdr #:G36060))) (nreverse #:G36061))) env))) (block macrolet (cl-macroexpand-all (cons (quote progn) form) (nconc (loop for (name . details) in macros collect (list* name (quote lambda) (cdr (cl-transform-lambda details name)))) env))) # bind (env macros) (let* ((macros (if form (pop form) (signal (quote wrong-number-of-arguments) (list (quote macrolet) (length form))))) (env byte-compile-macro-environment)) (block macrolet (cl-macroexpand-all (cons (quote progn) form) (nconc (loop for (name . details) in macros collect (list* name (quote lambda) (cdr (cl-transform-lambda details name)))) env)))) # bind (form) (lambda (&rest form) "Make temporary macro definitions.\nThis is like `flet', but for macros instead of functions.\n\narguments: ((&rest MACROS) &body FORM &environment ENV)\n" (let* ((macros (if form (pop form) (signal (quote wrong-number-of-arguments) (list (quote macrolet) (length form))))) (env byte-compile-macro-environment)) (block macrolet (cl-macroexpand-all (cons (quote progn) form) (nconc (loop for (name . details) in macros collect (list* name (quote lambda) (cdr (cl-transform-lambda details name)))) env)))))(((isearch-setf-state-places (cmd) (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd))))))) (isearch-make-state-object nil (cons (quote list) isearch-state-places))) (defun isearch-push-state nil (push (isearch-make-state-object) isearch-cmds)) (defun isearch-top-state nil (isearch-setf-state-places (car isearch-cmds)))) # bind (byte-compile-macro-environment) macroexpand((macrolet ((isearch-setf-state-places (cmd) (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd))))))) (isearch-make-state-object nil (cons (quote list) isearch-state-places))) (defun isearch-push-state nil (push (isearch-make-state-object) isearch-cmds)) (defun isearch-top-state nil (isearch-setf-state-places (car isearch-cmds)))) ((-203850872 (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets))))) (setq form (macroexpand form env)) (eq form (setq form (macroexpand form env))) (not (eq form (setq form (macroexpand form env)))) (or (not (eq form (setq form (macroexpand form env)))) (and cl-macroexpand-cmacs (not (eq form (setq form (compiler-macroexpand form)))))) (while (or (not (eq form (setq form (macroexpand form env)))) (and cl-macroexpand-cmacs (not (eq form (setq form (compiler-macroexpand form))))))) # bind (env form) cl-macroexpand-all((macrolet ((isearch-setf-state-places (cmd) (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd))))))) (isearch-make-state-object nil (cons (quote list) isearch-state-places))) (defun isearch-push-state nil (push (isearch-make-state-object) isearch-cmds)) (defun isearch-top-state nil (isearch-setf-state-places (car isearch-cmds)))) ((-203850872 (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets))))) (cond ((not (consp form)) form) ((memq (car form) (quote (let let*))) (if (null (nth 1 form)) (cl-macroexpand-all (cons (quote progn) (cddr form)) env) (let ((letf nil) (res nil) (lets (cadr form)) (env env) shadows) (while lets (push (funcall (function (lambda (symbol valueform) (let* ((eq-hash (eq-hash symbol)) (acons (cdr (assoc* eq-hash env)))) (if (null acons) (cons symbol (cl-macroexpand-body valueform env)) (if (cadr acons) (if (eq (car form) (quote let*)) (push (backquote ((\, eq-hash))) env) (push (backquote ((\, eq-hash))) shadows)) (setq symbol (car acons)) (unless (symbolp symbol) (setq letf t))) (cons symbol (cl-macroexpand-body valueform env)))))) (if (consp (car lets)) (caar lets) (car lets)) (cdr-safe (car-safe lets))) res) (setq lets (cdr lets))) (list* (if letf (if (eq (car form) (quote let)) (quote letf) (quote letf*)) (car form)) (nreverse res) (cl-macroexpand-body (cddr form) (nconc shadows env)))))) ((eq (car form) (quote cond)) (cons (car form) (mapcar (function (lambda (x) (cl-macroexpand-body x env))) (cdr form)))) ((eq (car form) (quote condition-case)) (list* (car form) (nth 1 form) (cl-macroexpand-all (nth 2 form) env) (mapcar (function (lambda (x) (cons (car x) (cl-macroexpand-body (cdr x) env)))) (cdddr form)))) ((memq (car form) (quote (quote function))) (if (eq (car-safe (nth 1 form)) (quote lambda)) (let* ((env (reduce (function nconc) (nth 1 (nth 1 form)) :from-end t :key (function (lambda (symbol) (when (and (not (member symbol (quote (&optional &rest)))) (cdr (assoc* (setq symbol (eq-hash symbol)) env))) (backquote (((\, symbol))))))) :initial-value env)) (body (cl-macroexpand-body (cddadr form) env))) (if (and cl-closure-vars (eq (car form) (quote function)) (cl-expr-contains-any body cl-closure-vars)) (let* ((closed (remove* nil cl-closure-vars :key (function (lambda (y) (cl-expr-contains body y))))) (new (mapcar (quote gensym) closed)) (sub (pairlis closed new))) (put (car (last cl-closure-vars)) (quote used) t) (backquote (apply-partially (function (lambda (\, (append new (cadadr form))) (\,@ (sublis sub body)))) (\,@ closed)))) (list (car form) (list* (quote lambda) (cadadr form) body)))) (let ((found (cdr (assq (cadr form) env)))) (cond ((and (consp found) (eq (nth 1 (nth 1 found)) (quote cl-labels-args))) (cl-macroexpand-all (nth 1 (nth 2 (nth 2 found))) env)) ((and (consp found) (eq (nth 1 (nth 1 found)) (quote byte-compile-labels-args))) (unless (eq (quote function) (car form)) (byte-compile-warn "deprecated: '%s, use #'%s instead to quote it as a function" (cadr form) (cadr form))) (setq found (get (nth 1 (nth 1 (nth 3 found))) (quote byte-compile-data-placeholder))) (put found (quote byte-compile-label-calls) (1+ (get found (quote byte-compile-label-calls) 0))) (list (quote function) found)) (t form))))) ((memq (car form) (quote (defun defmacro))) (let ((env (reduce (function nconc) (nth 2 form) :from-end t :key (function (lambda (symbol) (when (and (not (member symbol (quote (&optional &rest)))) (cdr (assoc* (setq symbol (eq-hash symbol)) env))) (backquote (((\, symbol))))))) :initial-value env))) (list* (car form) (nth 1 form) (cl-macroexpand-body (cddr form) env)))) ((and (eq (car form) (quote progn)) (not (cddr form))) (cl-macroexpand-all (nth 1 form) env)) ((eq (car form) (quote setq)) (let* ((args (cl-macroexpand-body (cdr form) env)) (p args)) (while (and p (symbolp (car p))) (setq p (cddr p))) (if p (cl-macroexpand-all (cons (quote setf) args)) (cons (quote setq) args)))) (t (cons (car form) (cl-macroexpand-body (cdr form) env)))) # bind (env form) cl-macroexpand-all((progn (macrolet ((isearch-setf-state-places (cmd) (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd))))))) (isearch-make-state-object nil (cons (quote list) isearch-state-places))) (defun isearch-push-state nil (push (isearch-make-state-object) isearch-cmds)) (defun isearch-top-state nil (isearch-setf-state-places (car isearch-cmds))))) ((-203850872 (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets))))) # (catch #:symbol-macrolet ...) (catch (quote #:symbol-macrolet) (cl-macroexpand-all (cons (quote progn) body) (nconc (catch (quote #:nil) (let* ((#:G36058 symbol-macros) (shadow nil) (name nil) (expansion nil) (#:G36059 nil)) (while (consp #:G36058) (setq shadow (car #:G36058) name (car-safe (prog1 shadow (setq shadow (cdr shadow)))) expansion (car-safe (prog1 shadow (setq shadow (cdr shadow))))) (progn (while (not (symbolp name)) (setq name (signal (quote wrong-type-argument) (list (quote symbol) name (quote name))))) nil) (setq #:G36059 (cons (list* (eq-hash name) expansion shadow) #:G36059)) (setq #:G36058 (cdr #:G36058))) (nreverse #:G36059))) env))) (block symbol-macrolet (cl-macroexpand-all (cons (quote progn) body) (nconc (loop for (name expansion . shadow) in symbol-macros do (check-type name symbol) collect (list* (eq-hash name) expansion shadow)) env))) # bind (env symbol-macros) (let* ((symbol-macros (if body (pop body) (signal (quote wrong-number-of-arguments) (list (quote symbol-macrolet) (length body))))) (env byte-compile-macro-environment)) (block symbol-macrolet (cl-macroexpand-all (cons (quote progn) body) (nconc (loop for (name expansion . shadow) in symbol-macros do (check-type name symbol) collect (list* (eq-hash name) expansion shadow)) env)))) # bind (body) (lambda (&rest body) "Make temporary symbol macro definitions.\n\nElements in SYMBOL-MACROS look like (NAME EXPANSION &optional SHADOW).\nWithin BODY, a series of Lisp forms, a reference to NAME is replaced with its\nEXPANSION, and (setq NAME ...) acts like (setf EXPANSION ...).\n\nIf NAME is encountered in a lambda argument list within BODY, then the\ncorresponding symbol macro will be shadowed within the lambda body, and NAME\nwill be treated as normal.\n\nIf NAME is encountered as a symbol within the VARLIST of a `let', `let*',\n`lexical-let', or `lexical-let*' form, then the binding acts as it would with\n`letf' or `letf*', depending on the specific form encountered. This is in\ncontravention of Common Lisp, where such bindings shadow any enclosing symbol\nmacros. To specify the Common Lisp behavior for an individual symbol macro,\nsupply a non-nil third SHADOW element.\n\narguments: ((&rest SYMBOL-MACROS) &body BODY &environment ENV)\n" (let* ((symbol-macros (if body (pop body) (signal (quote wrong-number-of-arguments) (list (quote symbol-macrolet) (length body))))) (env byte-compile-macro-environment)) (block symbol-macrolet (cl-macroexpand-all (cons (quote progn) body) (nconc (loop for (name expansion . shadow) in symbol-macros do (check-type name symbol) collect (list* (eq-hash name) expansion shadow)) env)))))(((isearch-state-places (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets)))) (macrolet ((isearch-setf-state-places (cmd) (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd))))))) (isearch-make-state-object nil (cons (quote list) isearch-state-places))) (defun isearch-push-state nil (push (isearch-make-state-object) isearch-cmds)) (defun isearch-top-state nil (isearch-setf-state-places (car isearch-cmds))))) (symbol-macrolet ((isearch-state-places (quote (isearch-string isearch-message (point) isearch-success isearch-forward isearch-other-end isearch-word isearch-invalid-regexp isearch-wrapped isearch-barrier isearch-within-brackets)))) (macrolet ((isearch-setf-state-places (cmd) (cons (quote setf) (loop for var being each element in isearch-state-places using (index index) nconc (list var (backquote (nth (\, index) (\, cmd))))))) (isearch-make-state-object nil (cons (quote list) isearch-state-places))) (defun isearch-push-state nil (push (isearch-make-state-object) isearch-cmds)) (defun isearch-top-state nil (isearch-setf-state-places (car isearch-cmds))))) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) (load-internal "[internal]") load("/pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/lisp/isearch-mode.el") (prog1 (load full-path) (unless (memq (quote quick-build) internal-error-checking) (garbage-collect))) (if full-path (prog1 (load full-path) (unless (memq (quote quick-build) internal-error-checking) (garbage-collect))) (format-into (quote external-debugging-output) "\nLoad file %s: not found\n" file) nil) # bind (full-path) (let ((full-path (locate-file file load-path (if load-ignore-elc-files (quote (".el" "")) (quote (".elc" ".el" "")))))) (if full-path (prog1 (load full-path) (unless (memq (quote quick-build) internal-error-checking) (garbage-collect))) (format-into (quote external-debugging-output) "\nLoad file %s: not found\n" file) nil)) # bind (file) pureload("isearch-mode") (if (pureload file) nil (external-debugging-output "Fatal error during load, aborting") (kill-emacs 1)) (unless (pureload file) (external-debugging-output "Fatal error during load, aborting") (kill-emacs 1)) (while (setq file (car files)) (unless (pureload file) (external-debugging-output "Fatal error during load, aborting") (kill-emacs 1)) (setq files (cdr files))) # bind (file files) (let ((files preloaded-file-list) file) (while (setq file (car files)) (unless (pureload file) (external-debugging-output "Fatal error during load, aborting") (kill-emacs 1)) (setq files (cdr files))) (unless (featurep (quote toolbar)) (defun toolbar-button-p (obj) "No toolbar support." nil) (defun toolbar-specifier-p (obj) "No toolbar support." nil)) (fmakunbound (quote pureload))) (lambda nil (setq load-path (list source-lisp)) (setq module-load-path (list (expand-file-name "modules" build-directory))) (format-into (quote external-debugging-output) "\nUsing load-path %s" load-path) (format-into (quote external-debugging-output) "\nUsing module-load-path %s" module-load-path) (buffer-disable-undo (get-buffer "*scratch*")) (let ((temp-path (expand-file-name "." (car load-path)))) (setq load-path (nconc (mapcar (function (lambda (i) (concatenate (quote string) i "/"))) (directory-files temp-path t "^[^-.]" nil (quote dirs-only))) (cons (file-name-as-directory temp-path) load-path)))) (setq load-warn-when-source-only t) (defun pureload (file) (let ((full-path (locate-file file load-path (if load-ignore-elc-files (quote (".el" "")) (quote (".elc" ".el" "")))))) (if full-path (prog1 (load full-path) (unless (memq (quote quick-build) internal-error-checking) (garbage-collect))) (format-into (quote external-debugging-output) "\nLoad file %s: not found\n" file) nil))) (load (expand-file-name "dumped-lisp.el" source-lisp)) (let ((files preloaded-file-list) file) (while (setq file (car files)) (unless (pureload file) (external-debugging-output "Fatal error during load, aborting") (kill-emacs 1)) (setq files (cdr files))) (unless (featurep (quote toolbar)) (defun toolbar-button-p (obj) "No toolbar support." nil) (defun toolbar-specifier-p (obj) "No toolbar support." nil)) (fmakunbound (quote pureload))) (packages-load-package-dumped-lisps late-package-load-path))() # (unwind-protect ...) call-with-condition-handler(really-early-error-handler (lambda nil (setq load-path (list source-lisp)) (setq module-load-path (list (expand-file-name "modules" build-directory))) (format-into (quote external-debugging-output) "\nUsing load-path %s" load-path) (format-into (quote external-debugging-output) "\nUsing module-load-path %s" module-load-path) (buffer-disable-undo (get-buffer "*scratch*")) (let ((temp-path (expand-file-name "." (car load-path)))) (setq load-path (nconc (mapcar (function (lambda (i) (concatenate (quote string) i "/"))) (directory-files temp-path t "^[^-.]" nil (quote dirs-only))) (cons (file-name-as-directory temp-path) load-path)))) (setq load-warn-when-source-only t) (defun pureload (file) (let ((full-path (locate-file file load-path (if load-ignore-elc-files (quote (".el" "")) (quote (".elc" ".el" "")))))) (if full-path (prog1 (load full-path) (unless (memq (quote quick-build) internal-error-checking) (garbage-collect))) (format-into (quote external-debugging-output) "\nLoad file %s: not found\n" file) nil))) (load (expand-file-name "dumped-lisp.el" source-lisp)) (let ((files preloaded-file-list) file) (while (setq file (car files)) (unless (pureload file) (external-debugging-output "Fatal error during load, aborting") (kill-emacs 1)) (setq files (cdr files))) (unless (featurep (quote toolbar)) (defun toolbar-button-p (obj) "No toolbar support." nil) (defun toolbar-specifier-p (obj) "No toolbar support." nil)) (fmakunbound (quote pureload))) (packages-load-package-dumped-lisps late-package-load-path))) # bind (stack-trace-on-error) (let ((stack-trace-on-error nil)) (call-with-condition-handler (quote really-early-error-handler) (function (lambda nil (setq load-path (list source-lisp)) (setq module-load-path (list (expand-file-name "modules" build-directory))) (format-into (quote external-debugging-output) "\nUsing load-path %s" load-path) (format-into (quote external-debugging-output) "\nUsing module-load-path %s" module-load-path) (buffer-disable-undo (get-buffer "*scratch*")) (let ((temp-path (expand-file-name "." (car load-path)))) (setq load-path (nconc (mapcar (function (lambda (i) (concatenate (quote string) i "/"))) (directory-files temp-path t "^[^-.]" nil (quote dirs-only))) (cons (file-name-as-directory temp-path) load-path)))) (setq load-warn-when-source-only t) (defun pureload (file) (let ((full-path (locate-file file load-path (if load-ignore-elc-files (quote (".el" "")) (quote (".elc" ".el" "")))))) (if full-path (prog1 (load full-path) (unless (memq (quote quick-build) internal-error-checking) (garbage-collect))) (format-into (quote external-debugging-output) "\nLoad file %s: not found\n" file) nil))) (load (expand-file-name "dumped-lisp.el" source-lisp)) (let ((files preloaded-file-list) file) (while (setq file (car files)) (unless (pureload file) (external-debugging-output "Fatal error during load, aborting") (kill-emacs 1)) (setq files (cdr files))) (unless (featurep (quote toolbar)) (defun toolbar-button-p (obj) "No toolbar support." nil) (defun toolbar-specifier-p (obj) "No toolbar support." nil)) (fmakunbound (quote pureload))) (packages-load-package-dumped-lisps late-package-load-path))))) # bind (gc-cons-threshold) (let ((gc-cons-threshold (if (and purify-flag (eq (memq (quote quick-build) internal-error-checking) nil)) 30000 3000000))) (let ((stack-trace-on-error nil)) (call-with-condition-handler (quote really-early-error-handler) (function (lambda nil (setq load-path (list source-lisp)) (setq module-load-path (list (expand-file-name "modules" build-directory))) (format-into (quote external-debugging-output) "\nUsing load-path %s" load-path) (format-into (quote external-debugging-output) "\nUsing module-load-path %s" module-load-path) (buffer-disable-undo (get-buffer "*scratch*")) (let ((temp-path (expand-file-name "." (car load-path)))) (setq load-path (nconc (mapcar (function (lambda (i) (concatenate (quote string) i "/"))) (directory-files temp-path t "^[^-.]" nil (quote dirs-only))) (cons (file-name-as-directory temp-path) load-path)))) (setq load-warn-when-source-only t) (defun pureload (file) (let ((full-path (locate-file file load-path (if load-ignore-elc-files (quote (".el" "")) (quote (".elc" ".el" "")))))) (if full-path (prog1 (load full-path) (unless (memq (quote quick-build) internal-error-checking) (garbage-collect))) (format-into (quote external-debugging-output) "\nLoad file %s: not found\n" file) nil))) (load (expand-file-name "dumped-lisp.el" source-lisp)) (let ((files preloaded-file-list) file) (while (setq file (car files)) (unless (pureload file) (external-debugging-output "Fatal error during load, aborting") (kill-emacs 1)) (setq files (cdr files))) (unless (featurep (quote toolbar)) (defun toolbar-button-p (obj) "No toolbar support." nil) (defun toolbar-specifier-p (obj) "No toolbar support." nil)) (fmakunbound (quote pureload))) (packages-load-package-dumped-lisps late-package-load-path))))) (setq preloaded-file-list (mapcar (function file-name-sans-extension) preloaded-file-list)) (setq load-warn-when-source-only nil) (setq debugger (quote debug)) (when (member "no-site-file" command-line-args) (setq site-start-file nil)) (when (load "site-load" t) (garbage-collect)) (when purify-flag (message "Finding pointers to doc strings...") (Snarf-documentation "DOC") (message "Finding pointers to doc strings...done") (Verify-documentation)) (when (stringp site-start-file) (load "site-init" t)) (setq load-history (cons (nreverse current-load-list) (delete* nil (mapc (function (lambda (element) (delete* (quote defun) element :key (function car-safe)) (delete-if (function (lambda (elt) (and (symbolp elt) (get elt (quote variable-documentation))))) element))) load-history) :key (function cdr))) current-load-list nil) (setcar (car load-history) (file-truename (caar load-history))) (store-match-data (list (let ((extent (make-extent 0 6 "string"))) (set-extent-property extent (quote search) (quote discard)) extent))) (garbage-collect) (buffer-enable-undo "*scratch*")) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) (load-internal "[internal]") load("loadup.el") # bind (purify-flag load-ignore-elc-files) (let ((load-ignore-elc-files t) (purify-flag nil)) (load "loadup.el")) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) (load-internal "[internal]") load("loadup-el.el") (cond ((and (eq update-elc-files-to-compile nil) (eq need-to-rebuild-autoloads nil) (eq need-to-rebuild-mule-autoloads nil) (eq need-to-recompile-autoloads nil) (eq need-to-recompile-mule-autoloads nil))) ((eq update-elc-files-to-compile nil) (setq command-line-args (append (quote ("-l" "loadup-el.el" "run-temacs" "-batch" "-no-packages" "-no-autoloads" "-eval" "(setq stack-trace-on-error t)" "-eval" "(setq load-always-display-messages t)" "-l" "bytecomp.elc" "-l" "autoload.elc")) do-autoload-commands)) (write-sequence "\nNeed to regenerate auto-autoload files... " (quote external-debugging-output)) (let ((load-ignore-elc-files nil) (purify-flag nil)) (load "loadup.el"))) (t (let ((bc-bootstrap (mapcar (function (lambda (arg) (concatenate (quote string) (update-elc-chop-extension (locate-library arg)) ".el"))) lisp-files-needed-for-byte-compilation)) (bootstrap-other (mapcar (function (lambda (arg) (concatenate (quote string) (update-elc-chop-extension (locate-library arg)) ".el"))) lisp-files-needing-early-byte-compilation))) (setq inhibit-autoloads t) (let ((recompile-bc-bootstrap (mapcan (function (lambda (arg) (when (member arg update-elc-files-to-compile) (append (quote ("-f" "batch-byte-compile-one-file")) (list arg))))) bc-bootstrap)) (recompile-bootstrap-other (mapcan (function (lambda (arg) (when (member arg update-elc-files-to-compile) (append (quote ("-f" "batch-byte-compile-one-file")) (list arg))))) bootstrap-other))) (mapc (function (lambda (arg) (setq update-elc-files-to-compile (delete* arg update-elc-files-to-compile :test (if default-file-system-ignore-case (function equalp) (function equal)))))) (append bc-bootstrap bootstrap-other)) (setq command-line-args (append (quote ("-l" "loadup-el.el" "run-temacs" "-batch" "-no-packages" "-no-autoloads" "-eval" "(setq stack-trace-on-error t)" "-eval" "(setq load-always-display-messages t)")) (when recompile-bc-bootstrap (append (quote ("-eval" "(setq load-ignore-elc-files t)" "-l" "bytecomp")) recompile-bc-bootstrap (quote ("-eval" "(setq load-ignore-elc-files nil)")))) (quote ("-l" "bytecomp")) recompile-bootstrap-other do-autoload-commands (quote ("-eval" "(setq inhibit-autoloads nil)" "-f" "startup-load-autoloads" "-f" "batch-byte-compile")) update-elc-files-to-compile)))) (load "loadup-el.el"))) # bind (do-autoload-commands) (let ((do-autoload-commands (append (if (or need-to-rebuild-autoloads need-to-rebuild-mule-autoloads need-to-rebuild-module-autoloads) (list "-l" "autoload")) (if need-to-rebuild-autoloads (list "-f" "batch-update-directory-autoloads" "auto" source-lisp)) (if need-to-rebuild-mule-autoloads (list "-f" "batch-update-directory-autoloads" "mule" source-lisp-mule)) (if need-to-rebuild-module-autoloads (list "-eval" (concatenate (quote string) "(update-autoload-files '(" (mapconcat (function prin1-to-string) (mapcan (function (lambda (full-dir) (unless (member* (file-name-nondirectory full-dir) (quote ("." "..")) :test (function equal)) (directory-files full-dir t "\\.c$" nil t)))) (directory-files source-modules t nil t (quote subdirs))) " ") ") \"modules\" " (prin1-to-string aa-modules) ")"))) (if need-to-recompile-autoloads (list "-f" "batch-byte-compile-one-file" aa-lisp)) (if need-to-recompile-mule-autoloads (list "-f" "batch-byte-compile-one-file" aa-lisp-mule)) (if need-to-recompile-module-autoloads (list "-f" "batch-byte-compile-one-file" aa-modules))))) (cond ((and (eq update-elc-files-to-compile nil) (eq need-to-rebuild-autoloads nil) (eq need-to-rebuild-mule-autoloads nil) (eq need-to-recompile-autoloads nil) (eq need-to-recompile-mule-autoloads nil))) ((eq update-elc-files-to-compile nil) (setq command-line-args (append (quote ("-l" "loadup-el.el" "run-temacs" "-batch" "-no-packages" "-no-autoloads" "-eval" "(setq stack-trace-on-error t)" "-eval" "(setq load-always-display-messages t)" "-l" "bytecomp.elc" "-l" "autoload.elc")) do-autoload-commands)) (write-sequence "\nNeed to regenerate auto-autoload files... " (quote external-debugging-output)) (let ((load-ignore-elc-files nil) (purify-flag nil)) (load "loadup.el"))) (t (let ((bc-bootstrap (mapcar (function (lambda (arg) (concatenate (quote string) (update-elc-chop-extension (locate-library arg)) ".el"))) lisp-files-needed-for-byte-compilation)) (bootstrap-other (mapcar (function (lambda (arg) (concatenate (quote string) (update-elc-chop-extension (locate-library arg)) ".el"))) lisp-files-needing-early-byte-compilation))) (setq inhibit-autoloads t) (let ((recompile-bc-bootstrap (mapcan (function (lambda (arg) (when (member arg update-elc-files-to-compile) (append (quote ("-f" "batch-byte-compile-one-file")) (list arg))))) bc-bootstrap)) (recompile-bootstrap-other (mapcan (function (lambda (arg) (when (member arg update-elc-files-to-compile) (append (quote ("-f" "batch-byte-compile-one-file")) (list arg))))) bootstrap-other))) (mapc (function (lambda (arg) (setq update-elc-files-to-compile (delete* arg update-elc-files-to-compile :test (if default-file-system-ignore-case (function equalp) (function equal)))))) (append bc-bootstrap bootstrap-other)) (setq command-line-args (append (quote ("-l" "loadup-el.el" "run-temacs" "-batch" "-no-packages" "-no-autoloads" "-eval" "(setq stack-trace-on-error t)" "-eval" "(setq load-always-display-messages t)")) (when recompile-bc-bootstrap (append (quote ("-eval" "(setq load-ignore-elc-files t)" "-l" "bytecomp")) recompile-bc-bootstrap (quote ("-eval" "(setq load-ignore-elc-files nil)")))) (quote ("-l" "bytecomp")) recompile-bootstrap-other do-autoload-commands (quote ("-eval" "(setq inhibit-autoloads nil)" "-f" "startup-load-autoloads" "-f" "batch-byte-compile")) update-elc-files-to-compile)))) (load "loadup-el.el")))) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) # (unwind-protect ...) (load-internal "[internal]") load("../lisp/update-elc.el") # (condition-case ... . error) # (catch top-level ...) *** Killing XEmacs XEmacs exiting to debugger. Fatal error: assertion failed, file emacs.c, line 3397, ABORT() [1] Abort trap (core dumped) ./temacs -nd -no-packages -no-configured-paths... *** [./NEEDTODUMP] Error code 134 make[1]: stopped in /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src 1 error make[1]: stopped in /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35/src *** [src] Error code 2 make: stopped in /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35 1 error make: stopped in /pbulk/work/editors/xemacs-current/work/xemacs-21.5.35 *** Error code 2 Stop. make[1]: stopped in /usr/pkgsrc/editors/xemacs-current *** Error code 1 Stop. make: stopped in /usr/pkgsrc/editors/xemacs-current