summaryrefslogtreecommitdiff
path: root/gnu/usr.bin/perl/regcomp.sym
blob: bb5f8f8482bfe171503e69764db214d58e886e03 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# Format:
# NAME \t TYPE, arg-description [num-args] [longjump-len] \t DESCRIPTION

# Empty rows and #-comment rows are ignored.

# Exit points
END		END,    no	End of program.
SUCCEED		END,    no	Return from a subroutine, basically.

# Anchors:
BOL		BOL,    no	Match "" at beginning of line.
MBOL		BOL,    no	Same, assuming multiline.
SBOL		BOL,    no	Same, assuming singleline.
EOS		EOL,    no	Match "" at end of string.
EOL		EOL,    no	Match "" at end of line.
MEOL		EOL,    no	Same, assuming multiline.
SEOL		EOL,    no	Same, assuming singleline.
BOUND		BOUND,  no	Match "" at any word boundary
BOUNDUTF8	BOUND,  no	Match "" at any word boundary
BOUNDL		BOUND,  no	Match "" at any word boundary
BOUNDLUTF8	BOUND,  no	Match "" at any word boundary
NBOUND		NBOUND, no	Match "" at any word non-boundary
NBOUNDUTF8	NBOUND, no	Match "" at any word non-boundary
NBOUNDL		NBOUND, no	Match "" at any word non-boundary
NBOUNDLUTF8	NBOUND, no	Match "" at any word non-boundary
GPOS		GPOS,   no	Matches where last m//g left off.

# [Special] alternatives
REG_ANY		REG_ANY,    no	Match any one character (except newline).
ANYUTF8		REG_ANY,    no	Match any one Unicode character (except newline).
SANY		REG_ANY,    no	Match any one character.
SANYUTF8	REG_ANY,    no	Match any one Unicode character.
ANYOF		ANYOF,  sv	Match character in (or not in) this class.
ANYOFUTF8	ANYOF,  sv 1	Match character in (or not in) this class.
ALNUM		ALNUM,  no	Match any alphanumeric character
ALNUMUTF8	ALNUM,  no	Match any alphanumeric character in utf8
ALNUML		ALNUM,  no	Match any alphanumeric char in locale
ALNUMLUTF8	ALNUM,  no	Match any alphanumeric char in locale+utf8
NALNUM		NALNUM, no	Match any non-alphanumeric character
NALNUMUTF8	NALNUM, no	Match any non-alphanumeric character in utf8
NALNUML		NALNUM, no	Match any non-alphanumeric char in locale
NALNUMLUTF8	NALNUM, no	Match any non-alphanumeric char in locale+utf8
SPACE		SPACE,  no	Match any whitespace character
SPACEUTF8	SPACE,  no	Match any whitespace character in utf8
SPACEL		SPACE,  no	Match any whitespace char in locale
SPACELUTF8	SPACE,  no	Match any whitespace char in locale+utf8
NSPACE		NSPACE, no	Match any non-whitespace character
NSPACEUTF8	NSPACE, no	Match any non-whitespace character in utf8
NSPACEL		NSPACE, no	Match any non-whitespace char in locale
NSPACELUTF8	NSPACE, no	Match any non-whitespace char in locale+utf8
DIGIT		DIGIT,  no	Match any numeric character
DIGITUTF8	DIGIT,  no	Match any numeric character in utf8
DIGITL		DIGIT,  no	Match any numeric character in locale
DIGITLUTF8	DIGIT,  no	Match any numeric character in locale+utf8
NDIGIT		NDIGIT, no	Match any non-numeric character
NDIGITUTF8	NDIGIT, no	Match any non-numeric character in utf8
NDIGITL		NDIGIT, no	Match any non-numeric character in locale
NDIGITLUTF8	NDIGIT, no	Match any non-numeric character in locale+utf8
CLUMP		CLUMP,  no	Match any combining character sequence

# BRANCH	The set of branches constituting a single choice are hooked
#		together with their "next" pointers, since precedence prevents
#		anything being concatenated to any individual branch.  The
#		"next" pointer of the last BRANCH in a choice points to the
#		thing following the whole choice.  This is also where the
#		final "next" pointer of each individual branch points; each
#		branch starts with the operand node of a BRANCH node.
#
BRANCH		BRANCH, node	Match this alternative, or the next...

# BACK		Normal "next" pointers all implicitly point forward; BACK
#		exists to make loop structures possible.
# not used
BACK		BACK,   no	Match "", "next" ptr points backward.

# Literals
EXACT		EXACT,  sv	Match this string (preceded by length).
EXACTF		EXACT,  sv	Match this string, folded (prec. by length).
EXACTFL		EXACT,  sv	Match this string, folded in locale (w/len).

# Do nothing
NOTHING		NOTHING,no	Match empty string.
# A variant of above which delimits a group, thus stops optimizations
TAIL		NOTHING,no	Match empty string. Can jump here from outside.

# STAR,PLUS	'?', and complex '*' and '+', are implemented as circular
#		BRANCH structures using BACK.  Simple cases (one character
#		per match) are implemented with STAR and PLUS for speed
#		and to minimize recursive plunges.
#
STAR		STAR,   node	Match this (simple) thing 0 or more times.
PLUS		PLUS,   node	Match this (simple) thing 1 or more times.

CURLY		CURLY,  sv 2	Match this simple thing {n,m} times.
CURLYN		CURLY,  no 2	Match next-after-this simple thing 
#				{n,m} times, set parenths.
CURLYM		CURLY,  no 2	Match this medium-complex thing {n,m} times.
CURLYX		CURLY,  sv 2	Match this complex thing {n,m} times.

# This terminator creates a loop structure for CURLYX
WHILEM		WHILEM, no	Do curly processing and see if rest matches.

# OPEN,CLOSE,GROUPP	...are numbered at compile time.
OPEN		OPEN,   num 1	Mark this point in input as start of #n.
CLOSE		CLOSE,  num 1	Analogous to OPEN.

REF		REF,    num 1	Match some already matched string
REFF		REF,    num 1	Match already matched string, folded
REFFL		REF,    num 1	Match already matched string, folded in loc.

# grouping assertions
IFMATCH		BRANCHJ,off 1 2	Succeeds if the following matches.
UNLESSM		BRANCHJ,off 1 2	Fails if the following matches.
SUSPEND		BRANCHJ,off 1 1	"Independent" sub-RE.
IFTHEN		BRANCHJ,off 1 1	Switch, should be preceeded by switcher .
GROUPP		GROUPP, num 1	Whether the group matched.

# Support for long RE
LONGJMP		LONGJMP,off 1 1	Jump far away.
BRANCHJ		BRANCHJ,off 1 1	BRANCH with long offset.

# The heavy worker
EVAL		EVAL,   evl 1	Execute some Perl code.

# Modifiers
MINMOD		MINMOD, no	Next operator is not greedy.
LOGICAL		LOGICAL,no	Next opcode should set the flag only.

# This is not used yet
RENUM		BRANCHJ,off 1 1	Group with independently numbered parens.

# This is not really a node, but an optimized away piece of a "long" node.
# To simplify debugging output, we mark it as if it were a node
OPTIMIZED	NOTHING,off	Placeholder for dump.