summaryrefslogtreecommitdiff
path: root/lisp/test/regex.lsp
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/test/regex.lsp')
-rw-r--r--lisp/test/regex.lsp440
1 files changed, 440 insertions, 0 deletions
diff --git a/lisp/test/regex.lsp b/lisp/test/regex.lsp
new file mode 100644
index 0000000..64ba572
--- /dev/null
+++ b/lisp/test/regex.lsp
@@ -0,0 +1,440 @@
+;;
+;; Copyright (c) 2002 by The XFree86 Project, Inc.
+;;
+;; Permission is hereby granted, free of charge, to any person obtaining a
+;; copy of this software and associated documentation files (the "Software"),
+;; to deal in the Software without restriction, including without limitation
+;; the rights to use, copy, modify, merge, publish, distribute, sublicense,
+;; and/or sell copies of the Software, and to permit persons to whom the
+;; Software is furnished to do so, subject to the following conditions:
+;;
+;; The above copyright notice and this permission notice shall be included in
+;; all copies or substantial portions of the Software.
+;;
+;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+;; THE XFREE86 PROJECT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+;; OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+;; SOFTWARE.
+;;
+;; Except as contained in this notice, the name of the XFree86 Project shall
+;; not be used in advertising or otherwise to promote the sale, use or other
+;; dealings in this Software without prior written authorization from the
+;; XFree86 Project.
+;;
+;; Author: Paulo César Pereira de Andrade
+;;
+;;
+;; $XFree86: xc/programs/xedit/lisp/test/regex.lsp,v 1.2 2002/12/11 04:44:28 paulo Exp $
+;;
+
+;; Basic regex tests. This file is only for xedit lisp and for it's regex
+;; library. Note that the regex library used by xedit lisp is not mean't
+;; to be fully compatible with most regexes, but to be as fast as possible.
+;; This means that some patterns that looks basic may never be matched,
+;; but it is expected that almost any pattern can be rewritten to be
+;; matched, or in the worst case, it may be required to search in the
+;; regions matched by a previous regex.
+
+(defun re-test (expect &rest arguments &aux result (error t) unused error-value)
+ (multiple-value-setq
+ (unused error-value)
+ (ignore-errors
+ (setq result (apply #'re-exec arguments))
+ (setq error nil)))
+ (if error
+ (format t "ERROR: (re-exec~{ ~S~}) => ~S~%" arguments error-value)
+ (or (equal result expect)
+ (format t "(re-exec~{ ~S~}) => should be ~S not ~S~%"
+ arguments expect result))))
+
+;; errors only generated for regex compilation (or incorrect arguments)
+(defun re-error (&rest arguments &aux result (error t) unused error-value)
+ (multiple-value-setq
+ (unused error-value)
+ (ignore-errors
+ (setq result (apply #'re-comp arguments))
+ (setq error nil)))
+ (or error
+ (format t "ERROR: no error for (re-comp~{ ~S~})" arguments)))
+
+(re-error "")
+(re-error "a**")
+(re-error "[a")
+(re-error "a{")
+(re-error "a(")
+(re-error "a||b")
+(re-error "|b|c")
+(re-error "a|b|")
+
+(setq re (re-comp "abc"))
+(re-test '((0 . 3)) re "abc")
+(re-test '((0 . 3)) re "abc" :notbol t)
+(re-test '((0 . 3)) re "abc" :noteol t)
+(re-test '((0 . 3)) re "abc" :notbol t :noteol t)
+(re-test '((14 . 17)) re "aaaaaaaaaaaaaaabc")
+(re-test '((14 . 17)) re "aaaaaaaaaaaaaaabc" :start 12 :end 17)
+(re-test '((30 . 33)) re "xxxxxxxxxxxxxxaaaaaaaaaaaaaaaaabcxx")
+(re-test '((30 . 33)) re "xxxxxxxxxxxxxxaaaaaaaaaaaaaaaaabcxx" :start 28 :end 34)
+
+(setq re (re-comp "^abc"))
+(re-test '((0 . 3)) re "abc")
+(re-test :nomatch re "xabc")
+(re-test '((1 . 4)) re "xabc" :start 1)
+(re-test :nomatch re "xabc" :start 1 :notbol t)
+
+(setq re (re-comp "abc$"))
+(re-test '((0 . 3)) re "abc")
+(re-test :nomatch re "xabcx")
+(re-test '((1 . 4)) re "xabcx" :end 4)
+(re-test :nomatch re "xabc" :end 4 :noteol t)
+
+(setq re (re-comp "^abc$"))
+(re-test '((0 . 3)) re "abc")
+(re-test :nomatch re "xabcx")
+(re-test '((1 . 4)) re "xabcx" :start 1 :end 4)
+(re-test :nomatch re "xabcx" :start 1 :end 4 :notbol t)
+(re-test :nomatch re "xabcx" :start 1 :end 4 :noteol t)
+(re-test :nomatch re "xabcx" :start 1 :end 4 :notbol t :noteol t)
+(re-test nil re "abc" :count 0)
+
+(setq re (re-comp "abc|bcd|cde"))
+(re-test '((0 . 3)) re "abc")
+(re-test '((1 . 4)) re "aabc")
+(re-test '((3 . 6)) re "xxxbcdef")
+(re-test '((8 . 11)) re "abdzzzcdabcde")
+(re-test '((13 . 16)) re "xxxxabdecdabdcde")
+
+(setq re (re-comp "^abc|bcd$|cde"))
+(re-test '((0 . 3)) re "abcde")
+(re-test '((3 . 6)) re "xabcde")
+(re-test '((1 . 4)) re "xabcde" :start 1)
+(re-test '((3 . 6)) re "xabcde" :start 1 :notbol t)
+(re-test '((2 . 5)) re "xabcd")
+(re-test :nomatch re "xabcd" :noteol t)
+(re-test nil re "xabcd" :count 0)
+(re-test :nomatch re "abcdx" :notbol t)
+
+(setq re (re-comp "a?bc|ab?c|abc?"))
+(re-test '((0 . 3)) re "abc")
+(re-test :nomatch re "xxxb")
+(re-test '((3 . 5)) re "xxxbc")
+(re-test '((5 . 7)) re "sssssab")
+(re-test '((0 . 3)) re "abcd")
+(re-test '((1 . 4)) re "aabcdef")
+(re-test '((1 . 3)) re "aabbccdef") ;; ab matches abc?
+
+(setq re (re-comp "a?bc"))
+(re-test '((2 . 4)) re "acbcd")
+(re-test '((2 . 5)) re "acabcd")
+
+(setq re (re-comp "ab?c"))
+(re-test '((1 . 3)) re "xacc")
+(re-test '((2 . 5)) re "xxabcc")
+
+(setq re (re-comp "abc?"))
+(re-test '((1 . 3)) re "xababc")
+(re-test '((2 . 5)) re "xxabccabc")
+
+(setq re (re-comp "a*bc|ab*c|abc*"))
+(re-test '((0 . 9)) re "aaaaaaabc")
+(re-test '((1 . 10)) re "xaaaaaaabc")
+(re-test '((3 . 12)) re "xyzaaaaaaabc")
+(re-test '((0 . 4)) re "abbc")
+(re-test '((2 . 9)) re "xxabbbbbc")
+(re-test '((0 . 12)) re "abcccccccccc")
+(re-test '((0 . 12)) re "abccccccccccd")
+(re-test '((16 . 29)) re "xxxxxxxaaaaaaaaaabbbbbbbbbbbccccccccccc")
+(re-test '((11 . 13)) re "xxxbbbbbbbbbc")
+(re-test '((8 . 10)) re "aaaaazbxacd")
+
+(setq re (re-comp "a*bc"))
+(re-test '((2 . 4)) re "acbcd")
+(re-test '((2 . 5)) re "acabcd")
+(re-test '((2 . 8)) re "acaaaabcd")
+
+(setq re (re-comp "ab*c"))
+(re-test '((1 . 3)) re "xacc")
+(re-test '((2 . 5)) re "xxabcc")
+(re-test '((3 . 8)) re "xxaabbbcc")
+
+(setq re (re-comp "abc*"))
+(re-test '((1 . 3)) re "xababc")
+(re-test '((2 . 5)) re "xxabcbabccc")
+(re-test '((3 . 7)) re "axxabccabc")
+
+(setq re (re-comp "a+bc|ab+c|abc+"))
+(re-test :nomatch re "xxxbc")
+(re-test '((1 . 6)) re "xaaabc")
+(re-test '((8 . 12)) re "zzzzaaaaabbc")
+(re-test '((7 . 15)) re "zzzzaaaabbbbbbcccc")
+
+(setq re (re-comp "a.c"))
+(re-test '((0 . 3)) re "abc")
+(re-test '((1 . 4)) re "aaac")
+(re-test :nomatch re "xac")
+(re-test '((3 . 6)) re "xaxaac")
+(re-test '((2 . 5)) re "xxabc")
+(re-test '((3 . 6)) re "acxaxc")
+
+(setq re (re-comp "a*c"))
+(re-test '((0 . 1)) re "c")
+(re-test '((5 . 6)) re "xxxxxc")
+(re-test '((8 . 9)) re "xxxxxxxxc")
+(re-test '((7 . 8)) re "xxxxxxxcc")
+(re-test '((0 . 2)) re "ac")
+(re-test '((0 . 5)) re "aaaac")
+(re-test '((1 . 3)) re "xac")
+(re-test '((3 . 6)) re "xxxaac")
+(re-test '((2 . 4)) re "xxac")
+(re-test '((4 . 6)) re "xxxxac")
+
+(setq re (re-comp "a+c"))
+(re-test '((2 . 5)) re "xxaac")
+(re-test '((3 . 8)) re "xxxaaaac")
+(re-test '((6 . 8)) re "xaaaabac")
+(re-test :nomatch re "xxxc")
+(re-test '((4 . 9)) re "xxxxaaaaccc")
+
+(setq re (re-comp "a{4}b"))
+(re-test '((19 . 24)) re "xabxxaabxxxaaabxxxxaaaab")
+(re-test '((4 . 9)) re "aaabaaaab")
+
+(setq re (re-comp "a{4,}b"))
+(re-test '((3 . 8)) re "xxxaaaab")
+(re-test '((8 . 25)) re "zaaabzzzaaaaaaaaaaaaaaaab")
+
+(setq re (re-comp "a{,4}b"))
+(re-test '((0 . 1)) re "b")
+(re-test '((8 . 9)) re "xxxxxxxxb")
+(re-test '((6 . 11)) re "xaaaaaaaaab")
+(re-test '((3 . 5)) re "xxxab")
+(re-test '((6 . 10)) re "aaaaaxaaab")
+
+(setq re (re-comp "a{2,4}b"))
+(re-test :nomatch re "xab")
+(re-test '((1 . 4)) re "xaab")
+(re-test '((1 . 5)) re "xaaab")
+(re-test '((2 . 7)) re "xxaaaab")
+(re-test '((4 . 9)) re "xxxaaaaab")
+
+(setq re (re-comp "foo(bar|baz)fee"))
+(re-test '((9 . 18)) re "feebarbazfoobarfee")
+(re-test '((9 . 18) (12 . 15)) re "feebarbazfoobarfee" :count 2)
+(re-test '((13 . 22)) re "foofooobazfeefoobazfee")
+(re-test '((13 . 22) (16 . 19)) re "foofooobazfeefoobazfee" :count 3)
+
+(setq re (re-comp "foo(bar|baz)fee" :nosub t))
+(re-test '((9 . 18)) re "feebarbazfoobarfee")
+(re-test '((9 . 18)) re "feebarbazfoobarfee" :count 2)
+(re-test '((13 . 22)) re "foofooobazfeefoobazfee")
+(re-test '((13 . 22)) re "foofooobazfeefoobazfee" :count 3)
+
+(setq re (re-comp "f(oo|ee)ba[rz]"))
+(re-test :nomatch re "barfoebaz")
+(re-test '((3 . 9) (4 . 6)) re "bazfoobar" :count 2)
+(re-test '((3 . 9) (4 . 6)) re "barfeebaz" :count 2)
+
+(setq re (re-comp "f(oo|ee)ba[rz]" :nosub t))
+(re-test :nomatch re "barfoebaz")
+(re-test '((3 . 9)) re "bazfoobar" :count 2)
+(re-test '((3 . 9)) re "barfeebaz" :count 2)
+
+(setq re (re-comp "\\<(int|char)\\>"))
+(re-test '((15 . 18)) re "aint character int foo")
+(re-test '((15 . 18) (15 . 18)) re "aint character int foo" :count 2)
+
+(setq re (re-comp "\\<(int|char)\\>" :nosub t))
+(re-test '((15 . 18)) re "aint character int foo" :count 2)
+
+(setq re (re-comp "foo.*bar"))
+(re-test '((11 . 17)) re "barfoblaboofoobarfoobarfoobar")
+
+(setq re (re-comp "foo.+bar"))
+(re-test :nomatch re "foobar")
+(re-test '((6 . 13)) re "fobbarfooxbarfooybar")
+
+(setq re (re-comp "foo.?bar"))
+(re-test '((1 . 7)) re "xfoobar")
+(re-test :nomatch re "xxfooxxbar")
+(re-test '((3 . 10)) re "yyyfootbar")
+
+(setq re (re-comp "a.*b.*c"))
+(re-test '((0 . 3)) re "abc")
+(re-test '((9 . 18)) re "xxxxxxxxxabbbbbbbccaaaaabbbc")
+
+(setq re (re-comp "a.+b.*c"))
+(re-test :nomatch re "xxxabc")
+(re-test '((2 . 7)) re "xxaxbbc")
+
+(setq re (re-comp "a.+b.?c"))
+(re-test '((1 . 5)) re "xaabc")
+(re-test '((2 . 7)) re "xxaabbc")
+
+(setq re (re-comp "(foo.*|bar)fee"))
+(re-test '((3 . 9) (3 . 6)) re "barfoofee" :count 2)
+(re-test '((0 . 9) (0 . 6)) re "foobarfee" :count 2)
+(re-test '((4 . 10) (4 . 7)) re "xxfobarfee" :count 2)
+(re-test '((3 . 17) (3 . 14)) re "barfooooooobarfee" :count 2)
+(re-test '((4 . 10) (4 . 7)) re "xxfobarfeefoobar" :count 2)
+
+(setq re (re-comp "(foo.+|bar)fee"))
+(re-test :nomatch re "barfoofee" :count 2)
+(re-test '((3 . 10) (3 . 7)) re "barfooxfee" :count 2)
+
+(setq re (re-comp "(foo.?|bar)fee"))
+(re-test :nomatch re "foobar" :count 2)
+(re-test '((2 . 8) (2 . 5)) re "bafoofee" :count 2)
+(re-test '((2 . 9) (2 . 6)) re "bafooofeebarfee" :count 4)
+(re-test '((2 . 8) (2 . 5)) re "bafoofeebarfee" :count 2)
+(re-test nil re "bafoofeebarfee" :count 0)
+(re-test '((2 . 8)) re "bafoofeebarfee" :count 1)
+
+(setq re (re-comp "(a|b|c)\\1"))
+(re-test '((0 . 2) (0 . 1)) re "aa" :count 2)
+
+(setq re (re-comp "(a|b|c)(a|b|c)\\1\\2"))
+(re-test '((0 . 4) (0 . 1) (1 . 2)) re "acac" :count 5)
+(re-test '((4 . 8) (4 . 5) (5 . 6)) re "xxxxacac" :count 4)
+(re-test '((24 . 28) (24 . 25) (25 . 26)) re "xxacabacbcacbbacbcaaccabcaca" :count 3)
+(re-test '((4 . 8) (4 . 5) (5 . 6)) re "xyabcccc" :count 3)
+(re-test '((4 . 8) (4 . 5)) re "xyabcccc" :count 2)
+(re-test '((4 . 8)) re "xyabcccc" :count 1)
+(re-test nil re "xyabcccc" :count 0)
+
+(setq re (re-comp "(a*b)\\1"))
+(re-test '((3 . 15) (3 . 9)) re "xxxaaaaabaaaaab" :count 2)
+(re-test '((7 . 9) (7 . 8)) re "abaabaxbb" :count 2)
+
+(setq re (re-comp "(ab+c)\\1"))
+(re-test '((3 . 13) (3 . 8)) re "xaaabbbcabbbc" :count 3)
+
+(setq re (re-comp "(ab?c)\\1"))
+(re-test :nomatch re "abcac" :count 2)
+(re-test '((4 . 8) (4 . 6)) re "acabacac" :count 2)
+(re-test '((5 . 11) (5 . 8)) re "abcacabcabc" :count 2)
+(re-test '((3 . 7) (3 . 5)) re "abcacac" :count 2)
+
+(setq re (re-comp "a(.*)b\\1"))
+(re-test '((3 . 5) (4 . 4)) re "xxxab" :count 2)
+(re-test '((4 . 12) (5 . 8)) re "xxxxazzzbzzz" :count 2)
+
+(setq re (re-comp "abc" :icase t))
+(re-test '((0 . 3)) re "AbC")
+
+(setq re (re-comp "[0-9][a-z]+" :icase t))
+(re-test '((3 . 10)) re "xxx0aaZxYT9")
+
+(setq re (re-comp "a.b" :icase t))
+(re-test '((10 . 13)) re "aaaaaaaaaaaxB")
+
+(setq re (re-comp "a.*z" :icase t))
+(re-test '((3 . 9)) re "xxxAaaaaZ")
+(re-test '((2 . 6)) re "xxaaaZaaa")
+
+(setq re (re-comp "\\<(lambda|defun|defmacro)\\>" :icase t))
+(re-test '((5 . 11)) re " (lambda")
+(re-test '((5 . 11) (5 . 11)) re " (lambda" :count 2)
+(re-test :nomatch re "lamda defunn deffmacro")
+
+(setq re (re-comp "\\<(nil|t)\\>" :icase t))
+(re-test '((3 . 6)) re "it Nil")
+(re-test '((3 . 6) (3 . 6)) re "it Nil" :count 6)
+(re-test :nomatch re "nilo")
+
+(setq re (re-comp "\\<(begin|end)\\>" :icase t))
+(re-test '((21 . 24) (21 . 24)) re "beginning the ending EnD" :count 7)
+
+(setq re (re-comp "a.*" :newline t))
+(re-test '((0 . 1)) re "a
+aaa")
+(re-test '((3 . 4)) re "xyza
+aa")
+
+(setq re (re-comp "a.+" :newline t))
+(re-test '((2 . 5)) re "a
+aaa")
+(re-test '((5 . 7)) re "xyza
+aa")
+
+(setq re (re-comp "a.?" :newline t))
+(re-test '((0 . 1)) re "a
+aaa")
+(re-test '((3 . 4)) re "xyza
+aa")
+
+(setq re (re-comp "a.*b.*c" :newline t))
+(re-test '((11 . 14)) re "xxaa
+zyacb
+abc")
+(re-test '((6 . 9)) re "xxxab
+abc
+c")
+
+(setq re (re-comp "a.+b.*c" :newline t))
+(re-test '((6 . 10)) re "ab
+bc
+abbc")
+
+(setq re (re-comp "a.?b.*c" :newline t))
+(re-test '((4 . 8)) re "ab
+cabbc
+cc")
+
+(setq re (re-comp "^foo$" :newline t))
+(re-test '((11 . 14)) re "bar
+foobar
+foo")
+(re-test '((0 . 3)) re "foo
+bar
+foo
+bar")
+(re-test '((8 . 11)) re "foo
+bar
+foo
+bar" :notbol t)
+(re-test '((8 . 11)) re "foo
+bar
+foo" :notbol t)
+(re-test :nomatch re "foo
+bar
+foo" :notbol t :noteol t)
+
+(setq re (re-comp "^\\s*#\\s*(define|include)\\s+.+" :newline t))
+(re-test '((8 . 18)) re "#define
+#include x")
+(re-test '((8 . 18) (9 . 16)) re "#define
+#include x" :count 2)
+
+(setq re (re-comp "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"))
+(re-test '((3 . 259)) re "zzzxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxzzz")
+
+(setq re (re-comp "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~"))
+(re-test '((13 . 333)) re "String here: ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890~/")
+
+(setq re (re-comp "(.*)\\D(\\d+)"))
+(re-test '((0 . 6) (0 . 3) (4 . 6)) re "abcW12" :count 3)
+(re-test '((0 . 6) (0 . 3)) re "abcW12" :count 2)
+(re-test '((0 . 6)) re "abcW12" :count 1)
+(re-test nil re "abcW12" :count 0)
+(re-test '((0 . 6) (0 . 3) (4 . 6)) re "abcW12abcW12" :count 3)
+(re-test '((0 . 6) (0 . 3) (4 . 6)) re "abcW12abcW12a" :count 3)
+
+(setq re (re-comp ".*\\d"))
+(re-test '((0 . 2)) re "a1a1a1aaaaaaa") ; minimal match only
+
+(setq re (re-comp "(.*)\\d"))
+(re-test '((0 . 2) (0 . 1)) re "a1a1a1aaaaaaa" :count 2); minimal match only
+
+(setq re (re-comp ".*(\\d)"))
+(re-test '((0 . 2) (1 . 2)) re "a1a1a1aaaaaaa" :count 2); minimal match only
+
+;; XXX this very simple pattern was entering an infinite loop
+;; actually, this pattern is not supported, just test if is not
+;; crashing (not supported because it is not cheap to match variations
+;; of the pattern)
+(setq re (re-comp "(.*a)?"))
+(re-test '((0 . 1)) re "aaaa") ; expected, minimal match
+(re-test '((0 . 1) (0 . 1)) re "aaaa" :count 2)