summaryrefslogtreecommitdiff
path: root/gnu/usr.bin/perl/toke.c
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/usr.bin/perl/toke.c')
-rw-r--r--gnu/usr.bin/perl/toke.c179
1 files changed, 114 insertions, 65 deletions
diff --git a/gnu/usr.bin/perl/toke.c b/gnu/usr.bin/perl/toke.c
index 9ecea185712..8525419b1d8 100644
--- a/gnu/usr.bin/perl/toke.c
+++ b/gnu/usr.bin/perl/toke.c
@@ -1,7 +1,7 @@
/* toke.c
*
* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- * 2000, 2001, 2002, 2003, by Larry Wall and others
+ * 2000, 2001, 2002, 2003, 2004, by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
@@ -250,18 +250,23 @@ S_no_op(pTHX_ char *what, char *s)
else
PL_bufptr = s;
yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
- if (is_first)
- Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
- else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
- char *t;
- for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
- if (t < PL_bufptr && isSPACE(*t))
- Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
- t - PL_oldoldbufptr, PL_oldoldbufptr);
- }
- else {
- assert(s >= oldbp);
- Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
+ if (ckWARN_d(WARN_SYNTAX)) {
+ if (is_first)
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "\t(Missing semicolon on previous line?)\n");
+ else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
+ char *t;
+ for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
+ if (t < PL_bufptr && isSPACE(*t))
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "\t(Do you need to predeclare %.*s?)\n",
+ t - PL_oldoldbufptr, PL_oldoldbufptr);
+ }
+ else {
+ assert(s >= oldbp);
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
+ }
}
PL_bufptr = oldbp;
}
@@ -1222,7 +1227,7 @@ S_scan_const(pTHX_ char *start)
const char *leaveit = /* set of acceptably-backslashed characters */
PL_lex_inpat
- ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
+ ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
: "";
if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
@@ -2424,8 +2429,12 @@ Perl_yylex(pTHX)
if (!PL_rsfp) {
PL_last_uni = 0;
PL_last_lop = 0;
- if (PL_lex_brackets)
- yyerror("Missing right curly or square bracket");
+ if (PL_lex_brackets) {
+ if (PL_lex_formbrack)
+ yyerror("Format not terminated");
+ else
+ yyerror("Missing right curly or square bracket");
+ }
DEBUG_T( { PerlIO_printf(Perl_debug_log,
"### Tokener got EOF\n");
} );
@@ -2521,8 +2530,13 @@ Perl_yylex(pTHX)
sv_setpv(PL_linestr,"");
TOKEN(';'); /* not infinite loop because rsfp is NULL now */
}
- /* if it looks like the start of a BOM, check if it in fact is */
- else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
+ /* If it looks like the start of a BOM or raw UTF-16,
+ * check if it in fact is. */
+ else if (bof &&
+ (*s == 0 ||
+ *(U8*)s == 0xEF ||
+ *(U8*)s >= 0xFE ||
+ s[1] == 0)) {
#ifdef PERLIO_IS_STDIO
# ifdef __GNU_LIBRARY__
# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
@@ -3022,21 +3036,25 @@ Perl_yylex(pTHX)
PL_lex_stuff = Nullsv;
}
else {
+ if (len == 6 && strnEQ(s, "unique", len)) {
+ if (PL_in_my == KEY_our)
+#ifdef USE_ITHREADS
+ GvUNIQUE_on(cGVOPx_gv(yylval.opval));
+#else
+ ; /* skip to avoid loading attributes.pm */
+#endif
+ else
+ Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
+ }
+
/* NOTE: any CV attrs applied here need to be part of
the CVf_BUILTIN_ATTRS define in cv.h! */
- if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
+ else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
CvLVALUE_on(PL_compcv);
else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
CvLOCKED_on(PL_compcv);
else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
CvMETHOD_on(PL_compcv);
- else if (PL_in_my == KEY_our && len == 6 &&
- strnEQ(s, "unique", len))
-#ifdef USE_ITHREADS
- GvUNIQUE_on(cGVOPx_gv(yylval.opval));
-#else
- ; /* skip that case to avoid loading attributes.pm */
-#endif
/* After we've set the flags, it could be argued that
we don't need to do the attributes.pm-based setting
process, and shouldn't bother appending recognized
@@ -4656,8 +4674,8 @@ Perl_yylex(pTHX)
if (isIDFIRST_lazy_if(s,UTF)) {
char *t;
for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
- t = skipspace(d);
- if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
+ for (t=d; *t && isSPACE(*t); t++) ;
+ if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
/* [perl #16184] */
&& !(t[0] == '=' && t[1] == '>')
) {
@@ -5065,8 +5083,12 @@ Perl_yylex(pTHX)
if (*s == ':' && s[1] != ':')
PL_expect = attrful;
- else if (!have_name && *s != '{' && key == KEY_sub)
- Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
+ else if (*s != '{' && key == KEY_sub) {
+ if (!have_name)
+ Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
+ else if (*s != ';')
+ Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
+ }
if (have_proto) {
PL_nextval[PL_nexttoke].opval =
@@ -5502,7 +5524,9 @@ Perl_keyword(pTHX_ register char *d, I32 len)
break;
case 6:
if (strEQ(d,"exists")) return KEY_exists;
- if (strEQ(d,"elseif")) Perl_warn(aTHX_ "elseif should be elsif");
+ if (strEQ(d,"elseif") && ckWARN_d(WARN_SYNTAX))
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "elseif should be elsif");
break;
case 8:
if (strEQ(d,"endgrent")) return -KEY_endgrent;
@@ -7577,20 +7601,23 @@ S_scan_formline(pTHX_ register char *s)
register char *t;
SV *stuff = newSVpvn("",0);
bool needargs = FALSE;
+ bool eofmt = FALSE;
while (!needargs) {
- if (*s == '.' || *s == /*{*/'}') {
+ if (*s == '.') {
/*SUPPRESS 530*/
#ifdef PERL_STRICT_CR
for (t = s+1;SPACE_OR_TAB(*t); t++) ;
#else
for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
#endif
- if (*t == '\n' || t == PL_bufend)
+ if (*t == '\n' || t == PL_bufend) {
+ eofmt = TRUE;
break;
+ }
}
if (PL_in_eval && !PL_rsfp) {
- eol = strchr(s,'\n');
+ eol = memchr(s,'\n',PL_bufend-s);
if (!eol++)
eol = PL_bufend;
}
@@ -7627,7 +7654,6 @@ S_scan_formline(pTHX_ register char *s)
PL_last_lop = PL_last_uni = Nullch;
if (!s) {
s = PL_bufptr;
- yyerror("Format not terminated");
break;
}
}
@@ -7656,7 +7682,8 @@ S_scan_formline(pTHX_ register char *s)
}
else {
SvREFCNT_dec(stuff);
- PL_lex_formbrack = 0;
+ if (eofmt)
+ PL_lex_formbrack = 0;
PL_bufptr = s;
}
return s;
@@ -7794,8 +7821,8 @@ Perl_yyerror(pTHX_ char *s)
(int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
PL_multi_end = 0;
}
- if (PL_in_eval & EVAL_WARNONLY)
- Perl_warn(aTHX_ "%"SVf, msg);
+ if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
else
qerror(msg);
if (PL_error_count >= 10) {
@@ -7819,72 +7846,94 @@ S_swallow_bom(pTHX_ U8 *s)
{
STRLEN slen;
slen = SvCUR(PL_linestr);
- switch (*s) {
+ switch (s[0]) {
case 0xFF:
if (s[1] == 0xFE) {
- /* UTF-16 little-endian */
+ /* UTF-16 little-endian? (or UTF32-LE?) */
if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
- Perl_croak(aTHX_ "Unsupported script encoding");
+ Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
#ifndef PERL_NO_UTF16_FILTER
- DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
s += 2;
+ utf16le:
if (PL_bufend > (char*)s) {
U8 *news;
I32 newlen;
filter_add(utf16rev_textfilter, NULL);
New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
- PL_bufend - (char*)s - 1,
- &newlen);
- Copy(news, s, newlen, U8);
- SvCUR_set(PL_linestr, newlen);
- PL_bufend = SvPVX(PL_linestr) + newlen;
- news[newlen++] = '\0';
+ PL_bufend =
+ (char*)utf16_to_utf8_reversed(s, news,
+ PL_bufend - (char*)s - 1,
+ &newlen);
+ sv_setpvn(PL_linestr, (const char*)news, newlen);
Safefree(news);
+ SvUTF8_on(PL_linestr);
+ s = (U8*)SvPVX(PL_linestr);
+ PL_bufend = SvPVX(PL_linestr) + newlen;
}
#else
- Perl_croak(aTHX_ "Unsupported script encoding");
+ Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
#endif
}
break;
case 0xFE:
- if (s[1] == 0xFF) { /* UTF-16 big-endian */
+ if (s[1] == 0xFF) { /* UTF-16 big-endian? */
#ifndef PERL_NO_UTF16_FILTER
- DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
s += 2;
+ utf16be:
if (PL_bufend > (char *)s) {
U8 *news;
I32 newlen;
filter_add(utf16_textfilter, NULL);
New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- PL_bufend = (char*)utf16_to_utf8(s, news,
- PL_bufend - (char*)s,
- &newlen);
- Copy(news, s, newlen, U8);
- SvCUR_set(PL_linestr, newlen);
- PL_bufend = SvPVX(PL_linestr) + newlen;
- news[newlen++] = '\0';
+ PL_bufend =
+ (char*)utf16_to_utf8(s, news,
+ PL_bufend - (char*)s,
+ &newlen);
+ sv_setpvn(PL_linestr, (const char*)news, newlen);
Safefree(news);
+ SvUTF8_on(PL_linestr);
+ s = (U8*)SvPVX(PL_linestr);
+ PL_bufend = SvPVX(PL_linestr) + newlen;
}
#else
- Perl_croak(aTHX_ "Unsupported script encoding");
+ Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
#endif
}
break;
case 0xEF:
if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
- DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
s += 3; /* UTF-8 */
}
break;
case 0:
- if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */
- s[2] == 0xFE && s[3] == 0xFF)
- {
- Perl_croak(aTHX_ "Unsupported script encoding");
+ if (slen > 3) {
+ if (s[1] == 0) {
+ if (s[2] == 0xFE && s[3] == 0xFF) {
+ /* UTF-32 big-endian */
+ Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
+ }
+ }
+ else if (s[2] == 0 && s[3] != 0) {
+ /* Leading bytes
+ * 00 xx 00 xx
+ * are a good indicator of UTF-16BE. */
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
+ goto utf16be;
+ }
}
+ default:
+ if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
+ /* Leading bytes
+ * xx 00 xx 00
+ * are a good indicator of UTF-16LE. */
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
+ goto utf16le;
+ }
}
return (char*)s;
}