diff options
-rw-r--r-- | gnu/usr.bin/perl/dist/Devel-PPPort/parts/inc/misc | 2431 |
1 files changed, 121 insertions, 2310 deletions
diff --git a/gnu/usr.bin/perl/dist/Devel-PPPort/parts/inc/misc b/gnu/usr.bin/perl/dist/Devel-PPPort/parts/inc/misc index deb1fb87a63..949c481088e 100644 --- a/gnu/usr.bin/perl/dist/Devel-PPPort/parts/inc/misc +++ b/gnu/usr.bin/perl/dist/Devel-PPPort/parts/inc/misc @@ -12,27 +12,43 @@ =provides __UNDEFINED__ -END_EXTERN_C -EXTERN_C -INT2PTR -MUTABLE_PTR -NVTYPE -PERLIO_FUNCS_CAST -PERLIO_FUNCS_DECL +PERL_UNUSED_DECL PERL_UNUSED_ARG +PERL_UNUSED_VAR PERL_UNUSED_CONTEXT -PERL_UNUSED_DECL PERL_UNUSED_RESULT -PERL_UNUSED_VAR +PERL_GCC_BRACE_GROUPS_FORBIDDEN PERL_USE_GCC_BRACE_GROUPS -PTR2ul +PERLIO_FUNCS_DECL +PERLIO_FUNCS_CAST +NVTYPE +INT2PTR PTRV +NUM2PTR +PERL_HASH +PTR2IV +PTR2UV +PTR2NV +PTR2ul START_EXTERN_C -STMT_END +END_EXTERN_C +EXTERN_C STMT_START -SvRX +STMT_END +UTF8_MAXBYTES WIDEST_UTYPE XSRETURN +HeUTF8 +C_ARRAY_LENGTH +C_ARRAY_END +SvRX +SvRXOK +cBOOL +OpHAS_SIBLING +OpSIBLING +OpMORESIB_set +OpLASTSIB_set +OpMAYBESIB_set =implementation @@ -42,91 +58,27 @@ __UNDEFINED__ OpSIBLING(o) (0 + (o)->op_sibling) __UNDEFINED__ OpMORESIB_set(o, sib) ((o)->op_sibling = (sib)) __UNDEFINED__ OpLASTSIB_set(o, parent) ((o)->op_sibling = NULL) __UNDEFINED__ OpMAYBESIB_set(o, sib, parent) ((o)->op_sibling = (sib)) -__UNDEFINED__ HEf_SVKEY -2 -#if defined(DEBUGGING) && !defined(__COVERITY__) -__UNDEFINED__ __ASSERT_(statement) assert(statement), -#else -__UNDEFINED__ __ASSERT_(statement) +#ifndef SvRX +#if { NEED SvRX } + +void * +SvRX(pTHX_ SV *rv) +{ + if (SvROK(rv)) { + SV *sv = SvRV(rv); + if (SvMAGICAL(sv)) { + MAGIC *mg = mg_find(sv, PERL_MAGIC_qr); + if (mg && mg->mg_obj) { + return mg->mg_obj; + } + } + } + return 0; +} +#endif #endif -/* These could become provided if/when they become part of the public API */ -__UNDEF_NOT_PROVIDED__ withinCOUNT(c, l, n) \ - (((WIDEST_UTYPE) (((c)) - ((l) | 0))) <= (((WIDEST_UTYPE) ((n) | 0)))) -__UNDEF_NOT_PROVIDED__ inRANGE(c, l, u) \ - ( (sizeof(c) == sizeof(U8)) ? withinCOUNT(((U8) (c)), (l), ((u) - (l))) \ - : (sizeof(c) == sizeof(U16)) ? withinCOUNT(((U16) (c)), (l), ((u) - (l))) \ - : (sizeof(c) == sizeof(U32)) ? withinCOUNT(((U32) (c)), (l), ((u) - (l))) \ - : (withinCOUNT(((WIDEST_UTYPE) (c)), (l), ((u) - (l))))) - -/* The '| 0' part ensures a compiler error if c is not integer (like e.g., a - * pointer) */ -#undef FITS_IN_8_BITS /* handy.h version uses a core-only constant */ -__UNDEF_NOT_PROVIDED__ FITS_IN_8_BITS(c) ( (sizeof(c) == 1) \ - || !(((WIDEST_UTYPE)((c) | 0)) & ~0xFF)) - -/* Create the macro for "is'macro'_utf8_safe(s, e)". For code points below - * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code - * point. That is so that it can automatically get the bug fixes done in this - * file. */ -#define D_PPP_IS_GENERIC_UTF8_SAFE(s, e, macro) \ - (((e) - (s)) <= 0 \ - ? 0 \ - : UTF8_IS_INVARIANT((s)[0]) \ - ? is ## macro ## _L1((s)[0]) \ - : (((e) - (s)) < UTF8SKIP(s)) \ - ? 0 \ - : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \ - /* The cast in the line below is only to silence warnings */ \ - ? is ## macro ## _L1((WIDEST_UTYPE) LATIN1_TO_NATIVE( \ - UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \ - & UTF_START_MASK(2), \ - (s)[1]))) \ - : is ## macro ## _utf8(s)) - -/* Create the macro for "is'macro'_LC_utf8_safe(s, e)". For code points below - * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code - * point. That is so that it can automatically get the bug fixes done in this - * file. */ -#define D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, macro) \ - (((e) - (s)) <= 0 \ - ? 0 \ - : UTF8_IS_INVARIANT((s)[0]) \ - ? is ## macro ## _LC((s)[0]) \ - : (((e) - (s)) < UTF8SKIP(s)) \ - ? 0 \ - : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \ - /* The cast in the line below is only to silence warnings */ \ - ? is ## macro ## _LC((WIDEST_UTYPE) LATIN1_TO_NATIVE( \ - UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \ - & UTF_START_MASK(2), \ - (s)[1]))) \ - : is ## macro ## _utf8(s)) - -/* A few of the early functions are broken. For these and the non-LC case, - * machine generated code is substituted. But that code doesn't work for - * locales. This is just like the above macro, but at the end, we call the - * macro we've generated for the above 255 case, which is correct since locale - * isn't involved. This will generate extra code to handle the 0-255 inputs, - * but hopefully it will be optimized out by the C compiler. But just in case - * it isn't, this macro is only used on the few versions that are broken */ - -#define D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, macro) \ - (((e) - (s)) <= 0 \ - ? 0 \ - : UTF8_IS_INVARIANT((s)[0]) \ - ? is ## macro ## _LC((s)[0]) \ - : (((e) - (s)) < UTF8SKIP(s)) \ - ? 0 \ - : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \ - /* The cast in the line below is only to silence warnings */ \ - ? is ## macro ## _LC((WIDEST_UTYPE) LATIN1_TO_NATIVE( \ - UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \ - & UTF_START_MASK(2), \ - (s)[1]))) \ - : is ## macro ## _utf8_safe(s, e)) - -__UNDEFINED__ SvRX(rv) (SvROK((rv)) ? (SvMAGICAL(SvRV((rv))) ? (mg_find(SvRV((rv)), PERL_MAGIC_qr) ? mg_find(SvRV((rv)), PERL_MAGIC_qr)->mg_obj : NULL) : NULL) : NULL) __UNDEFINED__ SvRXOK(sv) (!!SvRX(sv)) #ifndef PERL_UNUSED_DECL @@ -223,9 +175,9 @@ __UNDEFINED__ PTR2NV(p) NUM2PTR(NV,p) # define EXTERN_C extern #endif -#if { VERSION < 5.004 } || defined(PERL_GCC_PEDANTIC) +#if defined(PERL_GCC_PEDANTIC) # ifndef PERL_GCC_BRACE_GROUPS_FORBIDDEN -__UNDEF_NOT_PROVIDED__ PERL_GCC_BRACE_GROUPS_FORBIDDEN +# define PERL_GCC_BRACE_GROUPS_FORBIDDEN # endif #endif @@ -260,9 +212,6 @@ __UNDEFINED__ DEFSV_set(sv) (DEFSV = (sv)) /* Older perls (<=5.003) lack AvFILLp */ __UNDEFINED__ AvFILLp AvFILL -__UNDEFINED__ av_tindex AvFILL -__UNDEFINED__ av_top_index AvFILL - __UNDEFINED__ ERRSV get_sv("@",FALSE) /* Hint: gv_stashpvn @@ -313,6 +262,8 @@ __UNDEFINED__ dVAR dNOOP __UNDEFINED__ SVf "_" +__UNDEFINED__ UTF8_MAXBYTES UTF8_MAXLEN + __UNDEFINED__ CPERLscope(x) x __UNDEFINED__ PERL_HASH(hash,str,len) \ @@ -348,6 +299,26 @@ typedef OP* (CPERLscope(*Perl_check_t)) (pTHX_ OP*); #endif +__UNDEFINED__ isPSXSPC(c) (isSPACE(c) || (c) == '\v') +__UNDEFINED__ isBLANK(c) ((c) == ' ' || (c) == '\t') +#ifdef EBCDIC +__UNDEFINED__ isALNUMC(c) isalnum(c) +__UNDEFINED__ isASCII(c) isascii(c) +__UNDEFINED__ isCNTRL(c) iscntrl(c) +__UNDEFINED__ isGRAPH(c) isgraph(c) +__UNDEFINED__ isPRINT(c) isprint(c) +__UNDEFINED__ isPUNCT(c) ispunct(c) +__UNDEFINED__ isXDIGIT(c) isxdigit(c) +#else +# if { VERSION < 5.10.0 } +/* Hint: isPRINT + * The implementation in older perl versions includes all of the + * isSPACE() characters, which is wrong. The version provided by + * Devel::PPPort always overrides a present buggy version. + */ +# undef isPRINT +# endif + #ifndef WIDEST_UTYPE # ifdef QUADKIND # ifdef U64TYPE @@ -360,696 +331,13 @@ typedef OP* (CPERLscope(*Perl_check_t)) (pTHX_ OP*); # endif #endif -/* On versions without NATIVE_TO_ASCII, only ASCII is supported */ -#if defined(EBCDIC) && defined(NATIVE_TO_ASCI) -__UNDEFINED__ NATIVE_TO_LATIN1(c) NATIVE_TO_ASCII(c) -__UNDEFINED__ LATIN1_TO_NATIVE(c) ASCII_TO_NATIVE(c) -__UNDEFINED__ NATIVE_TO_UNI(c) ((c) > 255 ? (c) : NATIVE_TO_LATIN1(c)) -__UNDEFINED__ UNI_TO_NATIVE(c) ((c) > 255 ? (c) : LATIN1_TO_NATIVE(c)) -#else -__UNDEFINED__ NATIVE_TO_LATIN1(c) (c) -__UNDEFINED__ LATIN1_TO_NATIVE(c) (c) -__UNDEFINED__ NATIVE_TO_UNI(c) (c) -__UNDEFINED__ UNI_TO_NATIVE(c) (c) -#endif - -/* Warning: LATIN1_TO_NATIVE, NATIVE_TO_LATIN1 NATIVE_TO_UNI UNI_TO_NATIVE - EBCDIC is not supported on versions earlier than 5.7.1 - */ - -/* The meaning of this changed; use the modern version */ -#undef isPSXSPC -#undef isPSXSPC_A -#undef isPSXSPC_L1 - -/* Hint: isPSXSPC, isPSXSPC_A, isPSXSPC_L1, isPSXSPC_utf8_safe - This is equivalent to the corresponding isSPACE-type macro. On perls - before 5.18, this matched a vertical tab and SPACE didn't. But the - ppport.h SPACE version does match VT in all perl releases. Since VT's are - extremely rarely found in real-life files, this difference effectively - doesn't matter */ - -/* Hint: isSPACE, isSPACE_A, isSPACE_L1, isSPACE_utf8_safe - Until Perl 5.18, this did not match the vertical tab (VT). The ppport.h - version does match it in all perl releases. Since VT's are extremely rarely - found in real-life files, this difference effectively doesn't matter */ - -#ifdef EBCDIC - -/* This is the first version where these macros are fully correct on EBCDIC - * platforms. Relying on * the C library functions, as earlier releases did, - * causes problems with * locales */ -# if { VERSION < 5.22.0 } -# undef isALNUM -# undef isALNUM_A -# undef isALNUM_L1 -# undef isALNUMC -# undef isALNUMC_A -# undef isALNUMC_L1 -# undef isALPHA -# undef isALPHA_A -# undef isALPHA_L1 -# undef isALPHANUMERIC -# undef isALPHANUMERIC_A -# undef isALPHANUMERIC_L1 -# undef isASCII -# undef isASCII_A -# undef isASCII_L1 -# undef isBLANK -# undef isBLANK_A -# undef isBLANK_L1 -# undef isCNTRL -# undef isCNTRL_A -# undef isCNTRL_L1 -# undef isDIGIT -# undef isDIGIT_A -# undef isDIGIT_L1 -# undef isGRAPH -# undef isGRAPH_A -# undef isGRAPH_L1 -# undef isIDCONT -# undef isIDCONT_A -# undef isIDCONT_L1 -# undef isIDFIRST -# undef isIDFIRST_A -# undef isIDFIRST_L1 -# undef isLOWER -# undef isLOWER_A -# undef isLOWER_L1 -# undef isOCTAL -# undef isOCTAL_A -# undef isOCTAL_L1 -# undef isPRINT -# undef isPRINT_A -# undef isPRINT_L1 -# undef isPUNCT -# undef isPUNCT_A -# undef isPUNCT_L1 -# undef isSPACE -# undef isSPACE_A -# undef isSPACE_L1 -# undef isUPPER -# undef isUPPER_A -# undef isUPPER_L1 -# undef isWORDCHAR -# undef isWORDCHAR_A -# undef isWORDCHAR_L1 -# undef isXDIGIT -# undef isXDIGIT_A -# undef isXDIGIT_L1 -# endif - -__UNDEFINED__ isASCII(c) (isCNTRL(c) || isPRINT(c)) - - /* The below is accurate for all EBCDIC code pages supported by - * all the versions of Perl overridden by this */ -__UNDEFINED__ isCNTRL(c) ( (c) == '\0' || (c) == '\a' || (c) == '\b' \ - || (c) == '\f' || (c) == '\n' || (c) == '\r' \ - || (c) == '\t' || (c) == '\v' \ - || ((c) <= 3 && (c) >= 1) /* SOH, STX, ETX */ \ - || (c) == 7 /* U+7F DEL */ \ - || ((c) <= 0x13 && (c) >= 0x0E) /* SO, SI */ \ - /* DLE, DC[1-3] */ \ - || (c) == 0x18 /* U+18 CAN */ \ - || (c) == 0x19 /* U+19 EOM */ \ - || ((c) <= 0x1F && (c) >= 0x1C) /* [FGRU]S */ \ - || (c) == 0x26 /* U+17 ETB */ \ - || (c) == 0x27 /* U+1B ESC */ \ - || (c) == 0x2D /* U+05 ENQ */ \ - || (c) == 0x2E /* U+06 ACK */ \ - || (c) == 0x32 /* U+16 SYN */ \ - || (c) == 0x37 /* U+04 EOT */ \ - || (c) == 0x3C /* U+14 DC4 */ \ - || (c) == 0x3D /* U+15 NAK */ \ - || (c) == 0x3F /* U+1A SUB */ \ - ) - -#if '^' == 106 /* EBCDIC POSIX-BC */ -# define D_PPP_OUTLIER_CONTROL 0x5F -#else /* EBCDIC 1047 037 */ -# define D_PPP_OUTLIER_CONTROL 0xFF -#endif - -/* The controls are everything below blank, plus one outlier */ -__UNDEFINED__ isCNTRL_L1(c) ((WIDEST_UTYPE) (c) < ' ' \ - || (WIDEST_UTYPE) (c) == D_PPP_OUTLIER_CONTROL) -/* The ordering of the tests in this and isUPPER are to exclude most characters - * early */ -__UNDEFINED__ isLOWER(c) ( (c) >= 'a' && (c) <= 'z' \ - && ( (c) <= 'i' \ - || ((c) >= 'j' && (c) <= 'r') \ - || (c) >= 's')) -__UNDEFINED__ isUPPER(c) ( (c) >= 'A' && (c) <= 'Z' \ - && ( (c) <= 'I' \ - || ((c) >= 'J' && (c) <= 'R') \ - || (c) >= 'S')) - -#else /* Above is EBCDIC; below is ASCII */ - -# if { VERSION < 5.4.0 } -/* The implementation of these in older perl versions can give wrong results if - * the C program locale is set to other than the C locale */ -# undef isALNUM -# undef isALNUM_A -# undef isALPHA -# undef isALPHA_A -# undef isDIGIT -# undef isDIGIT_A -# undef isIDFIRST -# undef isIDFIRST_A -# undef isLOWER -# undef isLOWER_A -# undef isUPPER -# undef isUPPER_A -# endif - -# if { VERSION == 5.7.0 } /* this perl made space GRAPH */ -# undef isGRAPH -# endif - -# if { VERSION < 5.8.0 } /* earlier perls omitted DEL */ -# undef isCNTRL -# endif - -# if { VERSION < 5.10.0 } -/* earlier perls included all of the isSPACE() characters, which is wrong. The - * version provided by Devel::PPPort always overrides an existing buggy - * version. */ -# undef isPRINT -# undef isPRINT_A -# endif - -# if { VERSION < 5.14.0 } -/* earlier perls always returned true if the parameter was a signed char */ -# undef isASCII -# undef isASCII_A -# endif - -# if { VERSION < 5.17.8 } /* earlier perls didn't include PILCROW, SECTION SIGN */ -# undef isPUNCT_L1 -# endif - -# if { VERSION < 5.13.7 } /* khw didn't investigate why this failed */ -# undef isALNUMC_L1 -#endif - -# if { VERSION < 5.20.0 } /* earlier perls didn't include \v */ -# undef isSPACE -# undef isSPACE_A -# undef isSPACE_L1 - -# endif - +__UNDEFINED__ isALNUMC(c) (isALPHA(c) || isDIGIT(c)) __UNDEFINED__ isASCII(c) ((WIDEST_UTYPE) (c) <= 127) __UNDEFINED__ isCNTRL(c) ((WIDEST_UTYPE) (c) < ' ' || (c) == 127) -__UNDEFINED__ isCNTRL_L1(c) (isCNTRL(c) || ( (WIDEST_UTYPE) (c) <= 0x9F \ - && (WIDEST_UTYPE) (c) >= 0x80)) -__UNDEFINED__ isLOWER(c) ((c) >= 'a' && (c) <= 'z') -__UNDEFINED__ isUPPER(c) ((c) <= 'Z' && (c) >= 'A') - -#endif /* Below are definitions common to EBCDIC and ASCII */ - -__UNDEFINED__ isASCII_L1(c) isASCII(c) -__UNDEFINED__ isASCII_LC(c) isASCII(c) -__UNDEFINED__ isALNUM(c) isWORDCHAR(c) -__UNDEFINED__ isALNUMC(c) isALPHANUMERIC(c) -__UNDEFINED__ isALNUMC_L1(c) isALPHANUMERIC_L1(c) -__UNDEFINED__ isALPHA(c) (isUPPER(c) || isLOWER(c)) -__UNDEFINED__ isALPHA_L1(c) (isUPPER_L1(c) || isLOWER_L1(c)) -__UNDEFINED__ isALPHANUMERIC(c) (isALPHA(c) || isDIGIT(c)) -__UNDEFINED__ isALPHANUMERIC_L1(c) (isALPHA_L1(c) || isDIGIT(c)) -__UNDEFINED__ isALPHANUMERIC_LC(c) (isALPHA_LC(c) || isDIGIT_LC(c)) -__UNDEFINED__ isBLANK(c) ((c) == ' ' || (c) == '\t') -__UNDEFINED__ isBLANK_L1(c) ( isBLANK(c) \ - || ( FITS_IN_8_BITS(c) \ - && NATIVE_TO_LATIN1((U8) c) == 0xA0)) -__UNDEFINED__ isBLANK_LC(c) isBLANK(c) -__UNDEFINED__ isDIGIT(c) inRANGE(c, '0', '9') -__UNDEFINED__ isDIGIT_L1(c) isDIGIT(c) -__UNDEFINED__ isGRAPH(c) (isWORDCHAR(c) || isPUNCT(c)) -__UNDEFINED__ isGRAPH_L1(c) ( isPRINT_L1(c) \ - && (c) != ' ' \ - && NATIVE_TO_LATIN1((U8) c) != 0xA0) -__UNDEFINED__ isIDCONT(c) isWORDCHAR(c) -__UNDEFINED__ isIDCONT_L1(c) isWORDCHAR_L1(c) -__UNDEFINED__ isIDCONT_LC(c) isWORDCHAR_LC(c) -__UNDEFINED__ isIDFIRST(c) (isALPHA(c) || (c) == '_') -__UNDEFINED__ isIDFIRST_L1(c) (isALPHA_L1(c) || (U8) (c) == '_') -__UNDEFINED__ isIDFIRST_LC(c) (isALPHA_LC(c) || (U8) (c) == '_') -__UNDEFINED__ isLOWER_L1(c) ( isLOWER(c) \ - || ( FITS_IN_8_BITS(c) \ - && ( ( NATIVE_TO_LATIN1((U8) c) >= 0xDF \ - && NATIVE_TO_LATIN1((U8) c) != 0xF7) \ - || NATIVE_TO_LATIN1((U8) c) == 0xAA \ - || NATIVE_TO_LATIN1((U8) c) == 0xBA \ - || NATIVE_TO_LATIN1((U8) c) == 0xB5))) -__UNDEFINED__ isOCTAL(c) (((WIDEST_UTYPE)((c)) & ~7) == '0') -__UNDEFINED__ isOCTAL_L1(c) isOCTAL(c) -__UNDEFINED__ isPRINT(c) (isGRAPH(c) || (c) == ' ') -__UNDEFINED__ isPRINT_L1(c) (FITS_IN_8_BITS(c) && ! isCNTRL_L1(c)) -__UNDEFINED__ isPSXSPC(c) isSPACE(c) -__UNDEFINED__ isPSXSPC_L1(c) isSPACE_L1(c) -__UNDEFINED__ isPUNCT(c) ( (c) == '-' || (c) == '!' || (c) == '"' \ - || (c) == '#' || (c) == '$' || (c) == '%' \ - || (c) == '&' || (c) == '\'' || (c) == '(' \ - || (c) == ')' || (c) == '*' || (c) == '+' \ - || (c) == ',' || (c) == '.' || (c) == '/' \ - || (c) == ':' || (c) == ';' || (c) == '<' \ - || (c) == '=' || (c) == '>' || (c) == '?' \ - || (c) == '@' || (c) == '[' || (c) == '\\' \ - || (c) == ']' || (c) == '^' || (c) == '_' \ - || (c) == '`' || (c) == '{' || (c) == '|' \ - || (c) == '}' || (c) == '~') -__UNDEFINED__ isPUNCT_L1(c) ( isPUNCT(c) \ - || ( FITS_IN_8_BITS(c) \ - && ( NATIVE_TO_LATIN1((U8) c) == 0xA1 \ - || NATIVE_TO_LATIN1((U8) c) == 0xA7 \ - || NATIVE_TO_LATIN1((U8) c) == 0xAB \ - || NATIVE_TO_LATIN1((U8) c) == 0xB6 \ - || NATIVE_TO_LATIN1((U8) c) == 0xB7 \ - || NATIVE_TO_LATIN1((U8) c) == 0xBB \ - || NATIVE_TO_LATIN1((U8) c) == 0xBF))) -__UNDEFINED__ isSPACE(c) ( isBLANK(c) || (c) == '\n' || (c) == '\r' \ - || (c) == '\v' || (c) == '\f') -__UNDEFINED__ isSPACE_L1(c) ( isSPACE(c) \ - || (FITS_IN_8_BITS(c) \ - && ( NATIVE_TO_LATIN1((U8) c) == 0x85 \ - || NATIVE_TO_LATIN1((U8) c) == 0xA0))) -__UNDEFINED__ isUPPER_L1(c) ( isUPPER(c) \ - || (FITS_IN_8_BITS(c) \ - && ( NATIVE_TO_LATIN1((U8) c) >= 0xC0 \ - && NATIVE_TO_LATIN1((U8) c) <= 0xDE \ - && NATIVE_TO_LATIN1((U8) c) != 0xD7))) -__UNDEFINED__ isWORDCHAR(c) (isALPHANUMERIC(c) || (c) == '_') -__UNDEFINED__ isWORDCHAR_L1(c) (isIDFIRST_L1(c) || isDIGIT(c)) -__UNDEFINED__ isWORDCHAR_LC(c) (isIDFIRST_LC(c) || isDIGIT_LC(c)) -__UNDEFINED__ isXDIGIT(c) ( isDIGIT(c) \ - || ((c) >= 'a' && (c) <= 'f') \ - || ((c) >= 'A' && (c) <= 'F')) -__UNDEFINED__ isXDIGIT_L1(c) isXDIGIT(c) -__UNDEFINED__ isXDIGIT_LC(c) isxdigit(c) - -__UNDEFINED__ isALNUM_A(c) isALNUM(c) -__UNDEFINED__ isALNUMC_A(c) isALNUMC(c) -__UNDEFINED__ isALPHA_A(c) isALPHA(c) -__UNDEFINED__ isALPHANUMERIC_A(c) isALPHANUMERIC(c) -__UNDEFINED__ isASCII_A(c) isASCII(c) -__UNDEFINED__ isBLANK_A(c) isBLANK(c) -__UNDEFINED__ isCNTRL_A(c) isCNTRL(c) -__UNDEFINED__ isDIGIT_A(c) isDIGIT(c) -__UNDEFINED__ isGRAPH_A(c) isGRAPH(c) -__UNDEFINED__ isIDCONT_A(c) isIDCONT(c) -__UNDEFINED__ isIDFIRST_A(c) isIDFIRST(c) -__UNDEFINED__ isLOWER_A(c) isLOWER(c) -__UNDEFINED__ isOCTAL_A(c) isOCTAL(c) -__UNDEFINED__ isPRINT_A(c) isPRINT(c) -__UNDEFINED__ isPSXSPC_A(c) isPSXSPC(c) -__UNDEFINED__ isPUNCT_A(c) isPUNCT(c) -__UNDEFINED__ isSPACE_A(c) isSPACE(c) -__UNDEFINED__ isUPPER_A(c) isUPPER(c) -__UNDEFINED__ isWORDCHAR_A(c) isWORDCHAR(c) -__UNDEFINED__ isXDIGIT_A(c) isXDIGIT(c) - -__UNDEFINED__ isASCII_utf8_safe(s,e) (((e) - (s)) <= 0 ? 0 : isASCII(*(s))) -__UNDEFINED__ isASCII_uvchr(c) (FITS_IN_8_BITS(c) ? isASCII_L1(c) : 0) - -#if { VERSION >= 5.006 } -# ifdef isALPHA_uni /* If one defined, all are; this is just an exemplar */ -# define D_PPP_is_ctype(upper, lower, c) \ - (FITS_IN_8_BITS(c) \ - ? is ## upper ## _L1(c) \ - : is ## upper ## _uni((UV) (c))) /* _uni is old synonym */ -# else -# define D_PPP_is_ctype(upper, lower, c) \ - (FITS_IN_8_BITS(c) \ - ? is ## upper ## _L1(c) \ - : is_uni_ ## lower((UV) (c))) /* is_uni_ is even older */ -# endif - -__UNDEFINED__ isALPHA_uvchr(c) D_PPP_is_ctype(ALPHA, alpha, c) -__UNDEFINED__ isALPHANUMERIC_uvchr(c) (isALPHA_uvchr(c) || isDIGIT_uvchr(c)) -# ifdef is_uni_blank -__UNDEFINED__ isBLANK_uvchr(c) D_PPP_is_ctype(BLANK, blank, c) -# else -__UNDEFINED__ isBLANK_uvchr(c) (FITS_IN_8_BITS(c) \ - ? isBLANK_L1(c) \ - : ( (UV) (c) == 0x1680 /* Unicode 3.0 */ \ - || inRANGE((UV) (c), 0x2000, 0x200A) \ - || (UV) (c) == 0x202F /* Unicode 3.0 */\ - || (UV) (c) == 0x205F /* Unicode 3.2 */\ - || (UV) (c) == 0x3000)) -# endif -__UNDEFINED__ isCNTRL_uvchr(c) D_PPP_is_ctype(CNTRL, cntrl, c) -__UNDEFINED__ isDIGIT_uvchr(c) D_PPP_is_ctype(DIGIT, digit, c) -__UNDEFINED__ isGRAPH_uvchr(c) D_PPP_is_ctype(GRAPH, graph, c) -__UNDEFINED__ isIDCONT_uvchr(c) isWORDCHAR_uvchr(c) -__UNDEFINED__ isIDFIRST_uvchr(c) D_PPP_is_ctype(IDFIRST, idfirst, c) -__UNDEFINED__ isLOWER_uvchr(c) D_PPP_is_ctype(LOWER, lower, c) -__UNDEFINED__ isPRINT_uvchr(c) D_PPP_is_ctype(PRINT, print, c) -__UNDEFINED__ isPSXSPC_uvchr(c) isSPACE_uvchr(c) -__UNDEFINED__ isPUNCT_uvchr(c) D_PPP_is_ctype(PUNCT, punct, c) -__UNDEFINED__ isSPACE_uvchr(c) D_PPP_is_ctype(SPACE, space, c) -__UNDEFINED__ isUPPER_uvchr(c) D_PPP_is_ctype(UPPER, upper, c) -__UNDEFINED__ isXDIGIT_uvchr(c) D_PPP_is_ctype(XDIGIT, xdigit, c) -__UNDEFINED__ isWORDCHAR_uvchr(c) (FITS_IN_8_BITS(c) \ - ? isWORDCHAR_L1(c) : isALPHANUMERIC_uvchr(c)) - -__UNDEFINED__ isALPHA_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHA) -# ifdef isALPHANUMERIC_utf8 -__UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e) \ - D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHANUMERIC) -# else -__UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e) \ - (isALPHA_utf8_safe(s,e) || isDIGIT_utf8_safe(s,e)) -# endif - -/* This was broken before 5.18, and just use this instead of worrying about - * which releases the official works on */ -# if 'A' == 65 -__UNDEFINED__ isBLANK_utf8_safe(s,e) \ -( ( LIKELY((e) > (s)) ) ? /* Machine generated */ \ - ( ( 0x09 == ((const U8*)s)[0] || 0x20 == ((const U8*)s)[0] ) ? 1 \ - : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \ - ( ( 0xC2 == ((const U8*)s)[0] ) ? \ - ( ( 0xA0 == ((const U8*)s)[1] ) ? 2 : 0 ) \ - : ( 0xE1 == ((const U8*)s)[0] ) ? \ - ( ( ( 0x9A == ((const U8*)s)[1] ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ - : ( 0xE2 == ((const U8*)s)[0] ) ? \ - ( ( 0x80 == ((const U8*)s)[1] ) ? \ - ( ( inRANGE(((const U8*)s)[2], 0x80, 0x8A ) || 0xAF == ((const U8*)s)[2] ) ? 3 : 0 )\ - : ( ( 0x81 == ((const U8*)s)[1] ) && ( 0x9F == ((const U8*)s)[2] ) ) ? 3 : 0 )\ - : ( ( ( 0xE3 == ((const U8*)s)[0] ) && ( 0x80 == ((const U8*)s)[1] ) ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ - : 0 ) \ - : 0 ) - -# elif 'A' == 193 && '^' == 95 /* EBCDIC 1047 */ - -__UNDEFINED__ isBLANK_utf8_safe(s,e) \ -( ( LIKELY((e) > (s)) ) ? \ - ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1 \ - : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \ - ( ( 0x80 == ((const U8*)s)[0] ) ? \ - ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 ) \ - : ( 0xBC == ((const U8*)s)[0] ) ? \ - ( ( ( 0x63 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ - : ( 0xCA == ((const U8*)s)[0] ) ? \ - ( ( 0x41 == ((const U8*)s)[1] ) ? \ - ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\ - : ( 0x42 == ((const U8*)s)[1] ) ? \ - ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 ) \ - : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x73 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ - : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ - : 0 ) \ -: 0 ) - -# elif 'A' == 193 && '^' == 176 /* EBCDIC 037 */ - -__UNDEFINED__ isBLANK_utf8_safe(s,e) \ -( ( LIKELY((e) > (s)) ) ? \ - ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1 \ - : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \ - ( ( 0x78 == ((const U8*)s)[0] ) ? \ - ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 ) \ - : ( 0xBD == ((const U8*)s)[0] ) ? \ - ( ( ( 0x62 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ - : ( 0xCA == ((const U8*)s)[0] ) ? \ - ( ( 0x41 == ((const U8*)s)[1] ) ? \ - ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\ - : ( 0x42 == ((const U8*)s)[1] ) ? \ - ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 ) \ - : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x72 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ - : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ - : 0 ) \ -: 0 ) - -# else -# error Unknown character set -# endif - -__UNDEFINED__ isCNTRL_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, CNTRL) -__UNDEFINED__ isDIGIT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, DIGIT) -__UNDEFINED__ isGRAPH_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, GRAPH) -# ifdef isIDCONT_utf8 -__UNDEFINED__ isIDCONT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDCONT) -# else -__UNDEFINED__ isIDCONT_utf8_safe(s,e) isWORDCHAR_utf8_safe(s,e) -# endif - -__UNDEFINED__ isIDFIRST_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDFIRST) -__UNDEFINED__ isLOWER_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, LOWER) -__UNDEFINED__ isPRINT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PRINT) - -# undef isPSXSPC_utf8_safe /* Use the modern definition */ -__UNDEFINED__ isPSXSPC_utf8_safe(s,e) isSPACE_utf8_safe(s,e) - -__UNDEFINED__ isPUNCT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PUNCT) -__UNDEFINED__ isSPACE_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, SPACE) -__UNDEFINED__ isUPPER_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, UPPER) - -# ifdef isWORDCHAR_utf8 -__UNDEFINED__ isWORDCHAR_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, WORDCHAR) -# else -__UNDEFINED__ isWORDCHAR_utf8_safe(s,e) \ - (isALPHANUMERIC_utf8_safe(s,e) || (*(s)) == '_') -# endif - -/* This was broken before 5.12, and just use this instead of worrying about - * which releases the official works on */ -# if 'A' == 65 -__UNDEFINED__ isXDIGIT_utf8_safe(s,e) \ -( ( LIKELY((e) > (s)) ) ? \ - ( ( inRANGE(((const U8*)s)[0], 0x30, 0x39 ) || inRANGE(((const U8*)s)[0], 0x41, 0x46 ) || inRANGE(((const U8*)s)[0], 0x61, 0x66 ) ) ? 1\ - : ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xEF == ((const U8*)s)[0] ) ) ? ( ( 0xBC == ((const U8*)s)[1] ) ?\ - ( ( inRANGE(((const U8*)s)[2], 0x90, 0x99 ) || inRANGE(((const U8*)s)[2], 0xA1, 0xA6 ) ) ? 3 : 0 )\ - : ( ( 0xBD == ((const U8*)s)[1] ) && ( inRANGE(((const U8*)s)[2], 0x81, 0x86 ) ) ) ? 3 : 0 ) : 0 )\ -: 0 ) - -# elif 'A' == 193 && '^' == 95 /* EBCDIC 1047 */ - -__UNDEFINED__ isXDIGIT_utf8_safe(s,e) \ -( ( LIKELY((e) > (s)) ) ? \ - ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\ - : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x73 == ((const U8*)s)[1] ) ) ? ( ( 0x67 == ((const U8*)s)[2] ) ?\ - ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || inRANGE(((const U8*)s)[3], 0x62, 0x68 ) ) ? 4 : 0 )\ - : ( ( inRANGE(((const U8*)s)[2], 0x68, 0x69 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\ -: 0 ) - -# elif 'A' == 193 && '^' == 176 /* EBCDIC 037 */ - -__UNDEFINED__ isXDIGIT_utf8_safe(s,e) \ -( ( LIKELY((e) > (s)) ) ? \ - ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\ - : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x72 == ((const U8*)s)[1] ) ) ? ( ( 0x66 == ((const U8*)s)[2] ) ?\ - ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || 0x5F == ((const U8*)s)[3] || inRANGE(((const U8*)s)[3], 0x62, 0x67 ) ) ? 4 : 0 )\ - : ( ( inRANGE(((const U8*)s)[2], 0x67, 0x68 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\ -: 0 ) - -# else -# error Unknown character set -# endif - -__UNDEFINED__ isALPHA_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, ALPHA) -# ifdef isALPHANUMERIC_utf8 -__UNDEFINED__ isALPHANUMERIC_LC_utf8_safe(s,e) \ - D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, ALPHANUMERIC) -# else -__UNDEFINED__ isALPHANUMERIC_LC_utf8_safe(s,e) \ - (isALPHA_LC_utf8_safe(s,e) || isDIGIT_LC_utf8_safe(s,e)) -# endif - -__UNDEFINED__ isBLANK_LC_utf8_safe(s,e) \ - D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, BLANK) -__UNDEFINED__ isCNTRL_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, CNTRL) -__UNDEFINED__ isDIGIT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, DIGIT) -__UNDEFINED__ isGRAPH_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, GRAPH) -# ifdef isIDCONT_utf8 -__UNDEFINED__ isIDCONT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, IDCONT) -# else -__UNDEFINED__ isIDCONT_LC_utf8_safe(s,e) isWORDCHAR_LC_utf8_safe(s,e) -# endif - -__UNDEFINED__ isIDFIRST_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, IDFIRST) -__UNDEFINED__ isLOWER_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, LOWER) -__UNDEFINED__ isPRINT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, PRINT) - -# undef isPSXSPC_LC_utf8_safe /* Use the modern definition */ -__UNDEFINED__ isPSXSPC_LC_utf8_safe(s,e) isSPACE_LC_utf8_safe(s,e) - -__UNDEFINED__ isPUNCT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, PUNCT) -__UNDEFINED__ isSPACE_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, SPACE) -__UNDEFINED__ isUPPER_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, UPPER) - -# ifdef isWORDCHAR_utf8 -__UNDEFINED__ isWORDCHAR_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, WORDCHAR) -# else -__UNDEFINED__ isWORDCHAR_LC_utf8_safe(s,e) \ - (isALPHANUMERIC_LC_utf8_safe(s,e) || (*(s)) == '_') -# endif - -__UNDEFINED__ isXDIGIT_LC_utf8_safe(s,e) \ - D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, XDIGIT) - -/* Warning: isALPHANUMERIC_utf8_safe, isALPHA_utf8_safe, isASCII_utf8_safe, - * isBLANK_utf8_safe, isCNTRL_utf8_safe, isDIGIT_utf8_safe, isGRAPH_utf8_safe, - * isIDCONT_utf8_safe, isIDFIRST_utf8_safe, isLOWER_utf8_safe, - * isPRINT_utf8_safe, isPSXSPC_utf8_safe, isPUNCT_utf8_safe, isSPACE_utf8_safe, - * isUPPER_utf8_safe, isWORDCHAR_utf8_safe, isWORDCHAR_utf8_safe, - * isXDIGIT_utf8_safe, - * isALPHANUMERIC_LC_utf8_safe, isALPHA_LC_utf8_safe, isASCII_LC_utf8_safe, - * isBLANK_LC_utf8_safe, isCNTRL_LC_utf8_safe, isDIGIT_LC_utf8_safe, - * isGRAPH_LC_utf8_safe, isIDCONT_LC_utf8_safe, isIDFIRST_LC_utf8_safe, - * isLOWER_LC_utf8_safe, isPRINT_LC_utf8_safe, isPSXSPC_LC_utf8_safe, - * isPUNCT_LC_utf8_safe, isSPACE_LC_utf8_safe, isUPPER_LC_utf8_safe, - * isWORDCHAR_LC_utf8_safe, isWORDCHAR_LC_utf8_safe, isXDIGIT_LC_utf8_safe, - * isALPHANUMERIC_uvchr, isALPHA_uvchr, isASCII_uvchr, isBLANK_uvchr, - * isCNTRL_uvchr, isDIGIT_uvchr, isGRAPH_uvchr, isIDCONT_uvchr, - * isIDFIRST_uvchr, isLOWER_uvchr, isPRINT_uvchr, isPSXSPC_uvchr, - * isPUNCT_uvchr, isSPACE_uvchr, isUPPER_uvchr, isWORDCHAR_uvchr, - * isWORDCHAR_uvchr, isXDIGIT_uvchr - * - * The UTF-8 handling is buggy in early Perls, and this can give inaccurate - * results for code points above 0xFF, until the implementation started - * settling down in 5.12 and 5.14 */ - -#endif - -#define D_PPP_TOO_SHORT_MSG "Malformed UTF-8 character starting with:" \ - " \\x%02x (too short; %d bytes available, need" \ - " %d)\n" -/* Perls starting here had a new API which handled multi-character results */ -#if { VERSION >= 5.7.3 } - -__UNDEFINED__ toLOWER_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_lower(NATIVE_TO_UNI(c), s, l)) -__UNDEFINED__ toUPPER_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_upper(NATIVE_TO_UNI(c), s, l)) -__UNDEFINED__ toTITLE_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_title(NATIVE_TO_UNI(c), s, l)) -__UNDEFINED__ toFOLD_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_fold( NATIVE_TO_UNI(c), s, l)) - -# if { VERSION != 5.15.6 } /* Just this version is broken */ - - /* Prefer the macro to the function */ -# if defined toLOWER_utf8 -# define D_PPP_TO_LOWER_CALLEE(s,r,l) toLOWER_utf8(s,r,l) -# else -# define D_PPP_TO_LOWER_CALLEE(s,r,l) to_utf8_lower(s,r,l) -# endif -# if defined toTITLE_utf8 -# define D_PPP_TO_TITLE_CALLEE(s,r,l) toTITLE_utf8(s,r,l) -# else -# define D_PPP_TO_TITLE_CALLEE(s,r,l) to_utf8_title(s,r,l) -# endif -# if defined toUPPER_utf8 -# define D_PPP_TO_UPPER_CALLEE(s,r,l) toUPPER_utf8(s,r,l) -# else -# define D_PPP_TO_UPPER_CALLEE(s,r,l) to_utf8_upper(s,r,l) -# endif -# if defined toFOLD_utf8 -# define D_PPP_TO_FOLD_CALLEE(s,r,l) toFOLD_utf8(s,r,l) -# else -# define D_PPP_TO_FOLD_CALLEE(s,r,l) to_utf8_fold(s,r,l) -# endif -# else /* Below is 5.15.6, which failed to make the macros available -# outside of core, so we have to use the 'Perl_' form. khw -# decided it was easier to just handle this case than have to -# document the exception, and make an exception in the tests below -# */ -# define D_PPP_TO_LOWER_CALLEE(s,r,l) \ - Perl__to_utf8_lower_flags(aTHX_ s, r, l, 0, NULL) -# define D_PPP_TO_TITLE_CALLEE(s,r,l) \ - Perl__to_utf8_title_flags(aTHX_ s, r, l, 0, NULL) -# define D_PPP_TO_UPPER_CALLEE(s,r,l) \ - Perl__to_utf8_upper_flags(aTHX_ s, r, l, 0, NULL) -# define D_PPP_TO_FOLD_CALLEE(s,r,l) \ - Perl__to_utf8_fold_flags(aTHX_ s, r, l, FOLD_FLAGS_FULL, NULL) -# endif - -/* The actual implementation of the backported macros. If too short, croak, - * otherwise call the original that doesn't have an upper limit parameter */ -# define D_PPP_GENERIC_MULTI_ARG_TO(name, s, e,r,l) \ - (((((e) - (s)) <= 0) \ - /* We could just do nothing, but modern perls croak */ \ - ? (croak("Attempting case change on zero length string"), \ - 0) /* So looks like it returns something, and will compile */ \ - : ((e) - (s)) < UTF8SKIP(s)) \ - ? (croak(D_PPP_TOO_SHORT_MSG, \ - s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \ - 0) \ - : D_PPP_TO_ ## name ## _CALLEE(s,r,l)) - -__UNDEFINED__ toUPPER_utf8_safe(s,e,r,l) \ - D_PPP_GENERIC_MULTI_ARG_TO(UPPER,s,e,r,l) -__UNDEFINED__ toLOWER_utf8_safe(s,e,r,l) \ - D_PPP_GENERIC_MULTI_ARG_TO(LOWER,s,e,r,l) -__UNDEFINED__ toTITLE_utf8_safe(s,e,r,l) \ - D_PPP_GENERIC_MULTI_ARG_TO(TITLE,s,e,r,l) -__UNDEFINED__ toFOLD_utf8_safe(s,e,r,l) \ - D_PPP_GENERIC_MULTI_ARG_TO(FOLD,s,e,r,l) - -#elif { VERSION >= 5.006 } - -/* Here we have UTF-8 support, but using the original API where the case - * changing functions merely returned the changed code point; hence they - * couldn't handle multi-character results. */ - -# ifdef uvchr_to_utf8 -# define D_PPP_UV_TO_UTF8 uvchr_to_utf8 -# else -# define D_PPP_UV_TO_UTF8 uv_to_utf8 -# endif - - /* Get the utf8 of the case changed value, and store its length; then have - * to re-calculate the changed case value in order to return it */ -# define D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(name, c, s, l) \ - (*(l) = (D_PPP_UV_TO_UTF8(s, \ - UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c)))) - (s)), \ - UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c)))) - -__UNDEFINED__ toLOWER_uvchr(c, s, l) \ - D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(lower, c, s, l) -__UNDEFINED__ toUPPER_uvchr(c, s, l) \ - D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(upper, c, s, l) -__UNDEFINED__ toTITLE_uvchr(c, s, l) \ - D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(title, c, s, l) -__UNDEFINED__ toFOLD_uvchr(c, s, l) toLOWER_uvchr(c, s, l) - -# define D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(name, s, e, r, l) \ - (((((e) - (s)) <= 0) \ - ? (croak("Attempting case change on zero length string"), \ - 0) /* So looks like it returns something, and will compile */ \ - : ((e) - (s)) < UTF8SKIP(s)) \ - ? (croak(D_PPP_TOO_SHORT_MSG, \ - s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \ - 0) \ - /* Get the changed code point and store its UTF-8 */ \ - : D_PPP_UV_TO_UTF8(r, to_utf8_ ## name(s)), \ - /* Then store its length, and re-get code point for return */ \ - *(l) = UTF8SKIP(r), to_utf8_ ## name(r)) - -/* Warning: toUPPER_utf8_safe, toLOWER_utf8_safe, toTITLE_utf8_safe, - * toUPPER_uvchr, toLOWER_uvchr, toTITLE_uvchr - The UTF-8 case changing operations had bugs before around 5.12 or 5.14; - this backport does not correct them. - - In perls before 7.3, multi-character case changing is not implemented; this - backport uses the simple case changes available in those perls. */ - -__UNDEFINED__ toUPPER_utf8_safe(s,e,r,l) \ - D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(upper, s, e, r, l) -__UNDEFINED__ toLOWER_utf8_safe(s,e,r,l) \ - D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(lower, s, e, r, l) -__UNDEFINED__ toTITLE_utf8_safe(s,e,r,l) \ - D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(title, s, e, r, l) - - /* Warning: toFOLD_utf8_safe, toFOLD_uvchr - The UTF-8 case changing operations had bugs before around 5.12 or 5.14; - this backport does not correct them. - - In perls before 7.3, case folding is not implemented; instead, this - backport substitutes simple (not multi-character, which isn't available) - lowercasing. This gives the correct result in most, but not all, instances - */ - -__UNDEFINED__ toFOLD_utf8_safe(s,e,r,l) toLOWER_utf8_safe(s,e,r,l) - +__UNDEFINED__ isGRAPH(c) (isALNUM(c) || isPUNCT(c)) +__UNDEFINED__ isPRINT(c) (((c) >= 32 && (c) < 127)) +__UNDEFINED__ isPUNCT(c) (((c) >= 33 && (c) <= 47) || ((c) >= 58 && (c) <= 64) || ((c) >= 91 && (c) <= 96) || ((c) >= 123 && (c) <= 126)) +__UNDEFINED__ isXDIGIT(c) (isDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F')) #endif /* Until we figure out how to support this in older perls... */ @@ -1064,19 +352,6 @@ __UNDEFINED__ HeUTF8(he) ((HeKLEN(he) == HEf_SVKEY) ? \ __UNDEFINED__ C_ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0])) __UNDEFINED__ C_ARRAY_END(a) ((a) + C_ARRAY_LENGTH(a)) -__UNDEFINED__ LIKELY(x) (x) -__UNDEFINED__ UNLIKELY(x) (x) - -#ifndef MUTABLE_PTR -#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) -# define MUTABLE_PTR(p) ({ void *_p = (p); _p; }) -#else -# define MUTABLE_PTR(p) ((void *) (p)) -#endif -#endif - -__UNDEFINED__ MUTABLE_SV(p) ((SV *)MUTABLE_PTR(p)) - =xsmisc typedef XSPROTO(XSPROTO_test_t); @@ -1111,6 +386,10 @@ XS(XS_Devel__PPPort_dAXMARK) XSRETURN(1); } +=xsinit + +#define NEED_SvRX + =xsboot { @@ -1126,7 +405,6 @@ OpSIBLING_tests() PREINIT: OP *x; OP *kid; - OP *middlekid; OP *lastkid; int count = 0; int failures = 0; @@ -1150,7 +428,6 @@ OpSIBLING_tests() kid = OpSIBLING(kid); lastkid = kid; } - middlekid = OpSIBLING(x); /* Should now have a sibling */ if (! OpHAS_SIBLING(x) || ! OpSIBLING(x) ) { @@ -1194,9 +471,6 @@ OpSIBLING_tests() failures++; warn("Op should have had a sib after maybesibset"); } - op_free(lastkid); - op_free(middlekid); - op_free(x); RETVAL = failures; OUTPUT: RETVAL @@ -1316,7 +590,7 @@ DEFSV_modify() int ERRSV() CODE: - RETVAL = SvTRUEx(ERRSV); + RETVAL = SvTRUE(ERRSV); OUTPUT: RETVAL @@ -1402,1169 +676,44 @@ check_c_array() mXPUSHi(C_ARRAY_LENGTH(x)); /* 4 */ mXPUSHi(*(C_ARRAY_END(x)-1)); /* 13 */ -bool -isBLANK(ord) - UV ord - CODE: - RETVAL = isBLANK(ord); - OUTPUT: - RETVAL - -bool -isBLANK_A(ord) - UV ord - CODE: - RETVAL = isBLANK_A(ord); - OUTPUT: - RETVAL - -bool -isBLANK_L1(ord) - UV ord - CODE: - RETVAL = isBLANK_L1(ord); - OUTPUT: - RETVAL - -bool -isUPPER(ord) - UV ord - CODE: - RETVAL = isUPPER(ord); - OUTPUT: - RETVAL - -bool -isUPPER_A(ord) - UV ord - CODE: - RETVAL = isUPPER_A(ord); - OUTPUT: - RETVAL - -bool -isUPPER_L1(ord) - UV ord - CODE: - RETVAL = isUPPER_L1(ord); - OUTPUT: - RETVAL - -bool -isLOWER(ord) - UV ord - CODE: - RETVAL = isLOWER(ord); - OUTPUT: - RETVAL - -bool -isLOWER_A(ord) - UV ord - CODE: - RETVAL = isLOWER_A(ord); - OUTPUT: - RETVAL - -bool -isLOWER_L1(ord) - UV ord - CODE: - RETVAL = isLOWER_L1(ord); - OUTPUT: - RETVAL - -bool -isALPHA(ord) - UV ord - CODE: - RETVAL = isALPHA(ord); - OUTPUT: - RETVAL - -bool -isALPHA_A(ord) - UV ord - CODE: - RETVAL = isALPHA_A(ord); - OUTPUT: - RETVAL - -bool -isALPHA_L1(ord) - UV ord - CODE: - RETVAL = isALPHA_L1(ord); - OUTPUT: - RETVAL - -bool -isWORDCHAR(ord) - UV ord - CODE: - RETVAL = isWORDCHAR(ord); - OUTPUT: - RETVAL - -bool -isWORDCHAR_A(ord) - UV ord - CODE: - RETVAL = isWORDCHAR_A(ord); - OUTPUT: - RETVAL - -bool -isWORDCHAR_L1(ord) - UV ord - CODE: - RETVAL = isWORDCHAR_L1(ord); - OUTPUT: - RETVAL - -bool -isALPHANUMERIC(ord) - UV ord - CODE: - RETVAL = isALPHANUMERIC(ord); - OUTPUT: - RETVAL - -bool -isALPHANUMERIC_A(ord) - UV ord - CODE: - RETVAL = isALPHANUMERIC_A(ord); - OUTPUT: - RETVAL - -bool -isALNUM(ord) - UV ord - CODE: - RETVAL = isALNUM(ord); - OUTPUT: - RETVAL - -bool -isALNUM_A(ord) - UV ord - CODE: - RETVAL = isALNUM_A(ord); - OUTPUT: - RETVAL - -bool -isDIGIT(ord) - UV ord - CODE: - RETVAL = isDIGIT(ord); - OUTPUT: - RETVAL - -bool -isDIGIT_A(ord) - UV ord - CODE: - RETVAL = isDIGIT_A(ord); - OUTPUT: - RETVAL - -bool -isOCTAL(ord) - UV ord - CODE: - RETVAL = isOCTAL(ord); - OUTPUT: - RETVAL - -bool -isOCTAL_A(ord) - UV ord - CODE: - RETVAL = isOCTAL_A(ord); - OUTPUT: - RETVAL - -bool -isIDFIRST(ord) - UV ord - CODE: - RETVAL = isIDFIRST(ord); - OUTPUT: - RETVAL - -bool -isIDFIRST_A(ord) - UV ord - CODE: - RETVAL = isIDFIRST_A(ord); - OUTPUT: - RETVAL - -bool -isIDCONT(ord) - UV ord - CODE: - RETVAL = isIDCONT(ord); - OUTPUT: - RETVAL - -bool -isIDCONT_A(ord) - UV ord - CODE: - RETVAL = isIDCONT_A(ord); - OUTPUT: - RETVAL - -bool -isSPACE(ord) - UV ord - CODE: - RETVAL = isSPACE(ord); - OUTPUT: - RETVAL - -bool -isSPACE_A(ord) - UV ord - CODE: - RETVAL = isSPACE_A(ord); - OUTPUT: - RETVAL - -bool -isASCII(ord) - UV ord - CODE: - RETVAL = isASCII(ord); - OUTPUT: - RETVAL - -bool -isASCII_A(ord) - UV ord - CODE: - RETVAL = isASCII_A(ord); - OUTPUT: - RETVAL - -bool -isCNTRL(ord) - UV ord - CODE: - RETVAL = isCNTRL(ord); - OUTPUT: - RETVAL - -bool -isCNTRL_A(ord) - UV ord - CODE: - RETVAL = isCNTRL_A(ord); - OUTPUT: - RETVAL - -bool -isPRINT(ord) - UV ord - CODE: - RETVAL = isPRINT(ord); - OUTPUT: - RETVAL - -bool -isPRINT_A(ord) - UV ord - CODE: - RETVAL = isPRINT_A(ord); - OUTPUT: - RETVAL - -bool -isGRAPH(ord) - UV ord - CODE: - RETVAL = isGRAPH(ord); - OUTPUT: - RETVAL - -bool -isGRAPH_A(ord) - UV ord - CODE: - RETVAL = isGRAPH_A(ord); - OUTPUT: - RETVAL - -bool -isPUNCT(ord) - UV ord - CODE: - RETVAL = isPUNCT(ord); - OUTPUT: - RETVAL - -bool -isPUNCT_A(ord) - UV ord - CODE: - RETVAL = isPUNCT_A(ord); - OUTPUT: - RETVAL - -bool -isXDIGIT(ord) - UV ord - CODE: - RETVAL = isXDIGIT(ord); - OUTPUT: - RETVAL - -bool -isXDIGIT_A(ord) - UV ord - CODE: - RETVAL = isXDIGIT_A(ord); - OUTPUT: - RETVAL - -bool -isPSXSPC(ord) - UV ord - CODE: - RETVAL = isPSXSPC(ord); - OUTPUT: - RETVAL - -bool -isPSXSPC_A(ord) - UV ord - CODE: - RETVAL = isPSXSPC_A(ord); - OUTPUT: - RETVAL - -bool -isALPHANUMERIC_L1(ord) - UV ord - CODE: - RETVAL = isALPHANUMERIC_L1(ord); - OUTPUT: - RETVAL - -bool -isALNUMC_L1(ord) - UV ord - CODE: - RETVAL = isALNUMC_L1(ord); - OUTPUT: - RETVAL - -bool -isDIGIT_L1(ord) - UV ord - CODE: - RETVAL = isDIGIT_L1(ord); - OUTPUT: - RETVAL - -bool -isOCTAL_L1(ord) - UV ord - CODE: - RETVAL = isOCTAL_L1(ord); - OUTPUT: - RETVAL - -bool -isIDFIRST_L1(ord) - UV ord - CODE: - RETVAL = isIDFIRST_L1(ord); - OUTPUT: - RETVAL - -bool -isIDCONT_L1(ord) - UV ord - CODE: - RETVAL = isIDCONT_L1(ord); - OUTPUT: - RETVAL - -bool -isSPACE_L1(ord) - UV ord - CODE: - RETVAL = isSPACE_L1(ord); - OUTPUT: - RETVAL - -bool -isASCII_L1(ord) - UV ord - CODE: - RETVAL = isASCII_L1(ord); - OUTPUT: - RETVAL - -bool -isCNTRL_L1(ord) - UV ord - CODE: - RETVAL = isCNTRL_L1(ord); - OUTPUT: - RETVAL - -bool -isPRINT_L1(ord) - UV ord - CODE: - RETVAL = isPRINT_L1(ord); - OUTPUT: - RETVAL - -bool -isGRAPH_L1(ord) - UV ord - CODE: - RETVAL = isGRAPH_L1(ord); - OUTPUT: - RETVAL - -bool -isPUNCT_L1(ord) - UV ord - CODE: - RETVAL = isPUNCT_L1(ord); - OUTPUT: - RETVAL - -bool -isXDIGIT_L1(ord) - UV ord - CODE: - RETVAL = isXDIGIT_L1(ord); - OUTPUT: - RETVAL - -bool -isPSXSPC_L1(ord) - UV ord - CODE: - RETVAL = isPSXSPC_L1(ord); - OUTPUT: - RETVAL - -bool -isASCII_uvchr(ord) - UV ord - CODE: - RETVAL = isASCII_uvchr(ord); - OUTPUT: - RETVAL - -bool -isASCII_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - PERL_UNUSED_ARG(offset); - RETVAL = isASCII_utf8_safe(s, s + 1 + offset); - OUTPUT: - RETVAL - -#if { VERSION >= 5.006 } - -bool -isBLANK_uvchr(ord) - UV ord - CODE: - RETVAL = isBLANK_uvchr(ord); - OUTPUT: - RETVAL - -bool -isALPHA_uvchr(ord) - UV ord - CODE: - RETVAL = isALPHA_uvchr(ord); - OUTPUT: - RETVAL - -bool -isALPHANUMERIC_uvchr(ord) - UV ord - CODE: - RETVAL = isALPHANUMERIC_uvchr(ord); - OUTPUT: - RETVAL - -bool -isCNTRL_uvchr(ord) - UV ord - CODE: - RETVAL = isCNTRL_uvchr(ord); - OUTPUT: - RETVAL - -bool -isDIGIT_uvchr(ord) - UV ord - CODE: - RETVAL = isDIGIT_uvchr(ord); - OUTPUT: - RETVAL - -bool -isIDFIRST_uvchr(ord) - UV ord - CODE: - RETVAL = isIDFIRST_uvchr(ord); - OUTPUT: - RETVAL - -bool -isIDCONT_uvchr(ord) - UV ord - CODE: - RETVAL = isIDCONT_uvchr(ord); - OUTPUT: - RETVAL - -bool -isGRAPH_uvchr(ord) - UV ord - CODE: - RETVAL = isGRAPH_uvchr(ord); - OUTPUT: - RETVAL - -bool -isLOWER_uvchr(ord) - UV ord - CODE: - RETVAL = isLOWER_uvchr(ord); - OUTPUT: - RETVAL - -bool -isPRINT_uvchr(ord) - UV ord - CODE: - RETVAL = isPRINT_uvchr(ord); - OUTPUT: - RETVAL - -bool -isPSXSPC_uvchr(ord) - UV ord - CODE: - RETVAL = isPSXSPC_uvchr(ord); - OUTPUT: - RETVAL - -bool -isPUNCT_uvchr(ord) - UV ord - CODE: - RETVAL = isPUNCT_uvchr(ord); - OUTPUT: - RETVAL - -bool -isSPACE_uvchr(ord) - UV ord - CODE: - RETVAL = isSPACE_uvchr(ord); - OUTPUT: - RETVAL - -bool -isUPPER_uvchr(ord) - UV ord - CODE: - RETVAL = isUPPER_uvchr(ord); - OUTPUT: - RETVAL - -bool -isWORDCHAR_uvchr(ord) - UV ord - CODE: - RETVAL = isWORDCHAR_uvchr(ord); - OUTPUT: - RETVAL - -bool -isXDIGIT_uvchr(ord) - UV ord - CODE: - RETVAL = isXDIGIT_uvchr(ord); - OUTPUT: - RETVAL - -bool -isALPHA_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isALPHA_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isALPHANUMERIC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isALPHANUMERIC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isBLANK_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isBLANK_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isCNTRL_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isCNTRL_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isDIGIT_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isGRAPH_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isGRAPH_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isIDCONT_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isIDCONT_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isIDFIRST_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isIDFIRST_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isLOWER_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isPRINT_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isPRINT_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isPSXSPC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isPSXSPC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isPUNCT_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isPUNCT_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isSPACE_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isSPACE_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isUPPER_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isWORDCHAR_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isWORDCHAR_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isXDIGIT_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isXDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isALPHA_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isALPHA_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isALPHANUMERIC_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isALPHANUMERIC_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isASCII_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - PERL_UNUSED_ARG(offset); - RETVAL = isASCII_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isBLANK_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isBLANK_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isCNTRL_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isCNTRL_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isDIGIT_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isDIGIT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isGRAPH_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isGRAPH_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isIDCONT_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isIDCONT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isIDFIRST_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isIDFIRST_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isLOWER_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isLOWER_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isPRINT_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isPRINT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isPSXSPC_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isPSXSPC_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isPUNCT_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isPUNCT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isSPACE_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isSPACE_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isUPPER_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isUPPER_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isWORDCHAR_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isWORDCHAR_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -bool -isXDIGIT_LC_utf8_safe(s, offset) - unsigned char * s - int offset - CODE: - RETVAL = isXDIGIT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); - OUTPUT: - RETVAL - -AV * -toLOWER_utf8_safe(s, offset) - unsigned char * s - int offset - PREINIT: - U8 u[UTF8_MAXBYTES+1]; - Size_t len; - UV ret; - SV* utf8; - AV * av; - CODE: - av = newAV(); - ret = toLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len); - av_push(av, newSVuv(ret)); - - utf8 = newSVpvn((char *) u, len); - SvUTF8_on(utf8); - av_push(av, utf8); - - av_push(av, newSVuv(len)); - RETVAL = av; - OUTPUT: - RETVAL - -AV * -toTITLE_utf8_safe(s, offset) - unsigned char * s - int offset - PREINIT: - U8 u[UTF8_MAXBYTES+1]; - Size_t len; - UV ret; - SV* utf8; - AV * av; - CODE: - av = newAV(); - ret = toTITLE_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len); - av_push(av, newSVuv(ret)); - - utf8 = newSVpvn((char *) u, len); - SvUTF8_on(utf8); - av_push(av, utf8); - - av_push(av, newSVuv(len)); - RETVAL = av; - OUTPUT: - RETVAL - -AV * -toUPPER_utf8_safe(s, offset) - unsigned char * s - int offset - PREINIT: - U8 u[UTF8_MAXBYTES+1]; - Size_t len; - UV ret; - SV* utf8; - AV * av; - CODE: - av = newAV(); - ret = toUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len); - av_push(av, newSVuv(ret)); - - utf8 = newSVpvn((char *) u, len); - SvUTF8_on(utf8); - av_push(av, utf8); - - av_push(av, newSVuv(len)); - RETVAL = av; - OUTPUT: - RETVAL - -AV * -toFOLD_utf8_safe(s, offset) - unsigned char * s - int offset - PREINIT: - U8 u[UTF8_MAXBYTES+1]; - Size_t len; - UV ret; - SV* utf8; - AV * av; - CODE: - av = newAV(); - ret = toFOLD_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len); - av_push(av, newSVuv(ret)); - - utf8 = newSVpvn((char *) u, len); - SvUTF8_on(utf8); - av_push(av, utf8); - - av_push(av, newSVuv(len)); - RETVAL = av; - OUTPUT: - RETVAL - -AV * -toLOWER_uvchr(c) - UV c - PREINIT: - U8 u[UTF8_MAXBYTES+1]; - Size_t len; - UV ret; - SV* utf8; - AV * av; - CODE: - av = newAV(); - ret = toLOWER_uvchr(c, u, &len); - av_push(av, newSVuv(ret)); - - utf8 = newSVpvn((char *) u, len); - SvUTF8_on(utf8); - av_push(av, utf8); - - av_push(av, newSVuv(len)); - RETVAL = av; - OUTPUT: - RETVAL - -AV * -toTITLE_uvchr(c) - UV c - PREINIT: - U8 u[UTF8_MAXBYTES+1]; - Size_t len; - UV ret; - SV* utf8; - AV * av; - CODE: - av = newAV(); - ret = toTITLE_uvchr(c, u, &len); - av_push(av, newSVuv(ret)); - - utf8 = newSVpvn((char *) u, len); - SvUTF8_on(utf8); - av_push(av, utf8); - - av_push(av, newSVuv(len)); - RETVAL = av; - OUTPUT: - RETVAL - -AV * -toUPPER_uvchr(c) - UV c - PREINIT: - U8 u[UTF8_MAXBYTES+1]; - Size_t len; - UV ret; - SV* utf8; - AV * av; - CODE: - av = newAV(); - ret = toUPPER_uvchr(c, u, &len); - av_push(av, newSVuv(ret)); - - utf8 = newSVpvn((char *) u, len); - SvUTF8_on(utf8); - av_push(av, utf8); - - av_push(av, newSVuv(len)); - RETVAL = av; - OUTPUT: - RETVAL - -AV * -toFOLD_uvchr(c) - UV c - PREINIT: - U8 u[UTF8_MAXBYTES+1]; - Size_t len; - UV ret; - SV* utf8; - AV * av; - CODE: - av = newAV(); - ret = toFOLD_uvchr(c, u, &len); - av_push(av, newSVuv(ret)); - - utf8 = newSVpvn((char *) u, len); - SvUTF8_on(utf8); - av_push(av, utf8); - - av_push(av, newSVuv(len)); - RETVAL = av; - OUTPUT: - RETVAL - -#endif - -UV -LATIN1_TO_NATIVE(cp) - UV cp - CODE: - if (cp > 255) RETVAL= cp; - else RETVAL= LATIN1_TO_NATIVE(cp); - OUTPUT: - RETVAL - -UV -NATIVE_TO_LATIN1(cp) - UV cp - CODE: - RETVAL= NATIVE_TO_LATIN1(cp); - OUTPUT: - RETVAL - -STRLEN -av_tindex(av) - SV *av - CODE: - RETVAL = av_tindex((AV*)SvRV(av)); - OUTPUT: - RETVAL - -STRLEN -av_top_index(av) - SV *av - CODE: - RETVAL = av_top_index((AV*)SvRV(av)); - OUTPUT: - RETVAL - -=tests plan => 26826 +=tests plan => 48 use vars qw($my_sv @my_av %my_hv); -ok(&Devel::PPPort::boolSV(1), "Verify boolSV(1) is true"); -ok(!&Devel::PPPort::boolSV(0), "Verify boolSV(0) is false"); +ok(&Devel::PPPort::boolSV(1)); +ok(!&Devel::PPPort::boolSV(0)); $_ = "Fred"; -is(&Devel::PPPort::DEFSV(), "Fred", '$_ is FRED; Verify DEFSV is FRED'); -is(&Devel::PPPort::UNDERBAR(), "Fred", 'And verify UNDERBAR is FRED'); +ok(&Devel::PPPort::DEFSV(), "Fred"); +ok(&Devel::PPPort::UNDERBAR(), "Fred"); -if (ivers($]) >= ivers(5.9.2) && ivers($]) < ivers(5.23)) { +if ($] >= 5.009002 && $] < 5.023 && $] < 5.023004) { eval q{ no warnings "deprecated"; - no if $^V >= v5.17.9, warnings => "experimental::lexical_topic"; + no if $^V > v5.17.9, warnings => "experimental::lexical_topic"; my $_ = "Tony"; - is(&Devel::PPPort::DEFSV(), "Fred", 'lexical_topic eval: $_ is Tony; Verify DEFSV is Fred'); - is(&Devel::PPPort::UNDERBAR(), "Tony", 'And verify UNDERBAR is Tony'); + ok(&Devel::PPPort::DEFSV(), "Fred"); + ok(&Devel::PPPort::UNDERBAR(), "Tony"); }; - die __FILE__ . __LINE__ . ": $@" if $@; } else { - skip("perl version outside testing range of lexical_topic", 2); + ok(1); + ok(1); } my @r = &Devel::PPPort::DEFSV_modify(); -ok(@r == 3, "Verify got 3 elements"); -is($r[0], 'Fred'); -is($r[1], 'DEFSV'); -is($r[2], 'Fred'); +ok(@r == 3); +ok($r[0], 'Fred'); +ok($r[1], 'DEFSV'); +ok($r[2], 'Fred'); -is(&Devel::PPPort::DEFSV(), "Fred"); +ok(&Devel::PPPort::DEFSV(), "Fred"); eval { 1 }; -ok(!&Devel::PPPort::ERRSV(), "Verify ERRSV on true is false"); +ok(!&Devel::PPPort::ERRSV()); eval { cannot_call_this_one() }; -ok(&Devel::PPPort::ERRSV(), "Verify ERRSV on false is true"); +ok(&Devel::PPPort::ERRSV()); ok(&Devel::PPPort::gv_stashpvn('Devel::PPPort', 0)); ok(!&Devel::PPPort::gv_stashpvn('does::not::exist', 0)); @@ -2590,386 +739,48 @@ ok(&Devel::PPPort::get_cv('my_cv', 0)); ok(!&Devel::PPPort::get_cv('not_my_cv', 0)); ok(&Devel::PPPort::get_cv('not_my_cv', 1)); -is(Devel::PPPort::dXSTARG(42), 43); -is(Devel::PPPort::dAXMARK(4711), 4710); +ok(Devel::PPPort::dXSTARG(42), 43); +ok(Devel::PPPort::dAXMARK(4711), 4710); -is(Devel::PPPort::prepush(), 42); +ok(Devel::PPPort::prepush(), 42); -is(join(':', Devel::PPPort::xsreturn(0)), 'test1'); -is(join(':', Devel::PPPort::xsreturn(1)), 'test1:test2'); +ok(join(':', Devel::PPPort::xsreturn(0)), 'test1'); +ok(join(':', Devel::PPPort::xsreturn(1)), 'test1:test2'); -is(Devel::PPPort::PERL_ABS(42), 42, "Verify PERL_ABS(42) is 42"); -is(Devel::PPPort::PERL_ABS(-13), 13, "Verify PERL_ABS(-13) is 13"); +ok(Devel::PPPort::PERL_ABS(42), 42); +ok(Devel::PPPort::PERL_ABS(-13), 13); -is(Devel::PPPort::SVf(42), ivers($]) >= ivers(5.4) ? '[42]' : '42'); -is(Devel::PPPort::SVf('abc'), ivers($]) >= ivers(5.4) ? '[abc]' : 'abc'); +ok(Devel::PPPort::SVf(42), $] >= 5.004 ? '[42]' : '42'); +ok(Devel::PPPort::SVf('abc'), $] >= 5.004 ? '[abc]' : 'abc'); -is(&Devel::PPPort::Perl_ppaddr_t("FOO"), "foo"); +ok(&Devel::PPPort::Perl_ppaddr_t("FOO"), "foo"); -is(&Devel::PPPort::ptrtests(), 63); +ok(&Devel::PPPort::ptrtests(), 63); -is(&Devel::PPPort::OpSIBLING_tests(), 0); +ok(&Devel::PPPort::OpSIBLING_tests(), 0); -if (ivers($]) >= ivers(5.9)) { +if ($] >= 5.009000) { eval q{ - is(&Devel::PPPort::check_HeUTF8("hello"), "norm"); - is(&Devel::PPPort::check_HeUTF8("\N{U+263a}"), "utf8"); + ok(&Devel::PPPort::check_HeUTF8("hello"), "norm"); + ok(&Devel::PPPort::check_HeUTF8("\N{U+263a}"), "utf8"); }; } else { - skip("Too early perl version", 2); + ok(1, 1); + ok(1, 1); } @r = &Devel::PPPort::check_c_array(); -is($r[0], 4); -is($r[1], "13"); +ok($r[0], 4); +ok($r[1], "13"); ok(!Devel::PPPort::SvRXOK("")); ok(!Devel::PPPort::SvRXOK(bless [], "Regexp")); -if (ivers($]) < ivers(5.5)) { - skip 'no qr// objects in this perl', 2; +if ($] < 5.005) { + skip 'no qr// objects in this perl', 0; + skip 'no qr// objects in this perl', 0; } else { my $qr = eval 'qr/./'; - ok(Devel::PPPort::SvRXOK($qr), "SVRXOK(qr) is true"); + ok(Devel::PPPort::SvRXOK($qr)); ok(Devel::PPPort::SvRXOK(bless $qr, "Surprise")); } - -ok( Devel::PPPort::NATIVE_TO_LATIN1(0xB6) == 0xB6); -ok( Devel::PPPort::NATIVE_TO_LATIN1(0x1) == 0x1); -ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("A")) == 0x41); -ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("0")) == 0x30); - -ok( Devel::PPPort::LATIN1_TO_NATIVE(0xB6) == 0xB6, "Verify LATIN1_TO_NATIVE(0xB6) is 0xB6"); -if (ord("A") == 65) { - ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0x41); - ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0x30); -} -else { - ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0xC1); - ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0xF0); -} - -ok( Devel::PPPort::isALNUMC_L1(ord("5"))); -ok( Devel::PPPort::isALNUMC_L1(0xFC)); -ok(! Devel::PPPort::isALNUMC_L1(0xB6)); - -ok( Devel::PPPort::isOCTAL(ord("7")), "Verify '7' is OCTAL"); -ok(! Devel::PPPort::isOCTAL(ord("8")), "Verify '8' isn't OCTAL"); - -ok( Devel::PPPort::isOCTAL_A(ord("0")), "Verify '0' is OCTAL_A"); -ok(! Devel::PPPort::isOCTAL_A(ord("9")), "Verify '9' isn't OCTAL_A"); - -ok( Devel::PPPort::isOCTAL_L1(ord("2")), "Verify '2' is OCTAL_L1"); -ok(! Devel::PPPort::isOCTAL_L1(ord("8")), "Verify '8' isn't OCTAL_L1"); - -my $way_too_early_msg = 'UTF-8 not implemented on this perl'; - -# For the other properties, we test every code point from 0.255, and a -# smattering of higher ones. First populate a hash with keys like '65:ALPHA' -# to indicate that the code point there is alphabetic -my $i; -my %types; -for $i (0x41..0x5A, 0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xC0..0xD6, 0xD8..0xF6, - 0xF8..0x101) -{ - my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); - $types{"$native:ALPHA"} = 1; - $types{"$native:ALPHANUMERIC"} = 1; - $types{"$native:IDFIRST"} = 1; - $types{"$native:IDCONT"} = 1; - $types{"$native:PRINT"} = 1; - $types{"$native:WORDCHAR"} = 1; -} -for $i (0x30..0x39, 0x660, 0xFF19) { - my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); - $types{"$native:ALPHANUMERIC"} = 1; - $types{"$native:DIGIT"} = 1; - $types{"$native:IDCONT"} = 1; - $types{"$native:WORDCHAR"} = 1; - $types{"$native:GRAPH"} = 1; - $types{"$native:PRINT"} = 1; - $types{"$native:XDIGIT"} = 1 if $i < 255 || ($i >= 0xFF10 && $i <= 0xFF19); -} - -for $i (0..0x7F) { - my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); - $types{"$native:ASCII"} = 1; -} -for $i (0..0x1f, 0x7F..0x9F) { - my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); - $types{"$native:CNTRL"} = 1; -} -for $i (0x21..0x7E, 0xA1..0x101, 0x660) { - my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); - $types{"$native:GRAPH"} = 1; - $types{"$native:PRINT"} = 1; -} -for $i (0x09, 0x20, 0xA0) { - my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); - $types{"$native:BLANK"} = 1; - $types{"$native:SPACE"} = 1; - $types{"$native:PSXSPC"} = 1; - $types{"$native:PRINT"} = 1 if $i > 0x09; -} -for $i (0x09..0x0D, 0x85, 0x2029) { - my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); - $types{"$native:SPACE"} = 1; - $types{"$native:PSXSPC"} = 1; -} -for $i (0x41..0x5A, 0xC0..0xD6, 0xD8..0xDE, 0x100) { - my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); - $types{"$native:UPPER"} = 1; - $types{"$native:XDIGIT"} = 1 if $i < 0x47; -} -for $i (0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xDF..0xF6, 0xF8..0xFF, 0x101) { - my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); - $types{"$native:LOWER"} = 1; - $types{"$native:XDIGIT"} = 1 if $i < 0x67; -} -for $i (0x21..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7E, 0xB6, 0xA1, 0xA7, 0xAB, - 0xB7, 0xBB, 0xBF, 0x5BE) -{ - my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); - $types{"$native:PUNCT"} = 1; - $types{"$native:GRAPH"} = 1; - $types{"$native:PRINT"} = 1; -} - -$i = ord('_'); -$types{"$i:WORDCHAR"} = 1; -$types{"$i:IDFIRST"} = 1; -$types{"$i:IDCONT"} = 1; - -# Now find all the unique code points included above. -my %code_points_to_test; -my $key; -for $key (keys %types) { - $key =~ s/:.*//; - $code_points_to_test{$key} = 1; -} - -# And test each one -for $i (sort { $a <=> $b } keys %code_points_to_test) { - my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); - my $hex = sprintf("0x%02X", $native); - - # And for each code point test each of the classes - my $class; - for $class (qw(ALPHA ALPHANUMERIC ASCII BLANK CNTRL DIGIT GRAPH IDCONT - IDFIRST LOWER PRINT PSXSPC PUNCT SPACE UPPER WORDCHAR - XDIGIT)) - { - if ($i < 256) { # For the ones that can fit in a byte, test each of - # three macros. - my $suffix; - for $suffix ("", "_A", "_L1", "_uvchr") { - my $should_be = ($i > 0x7F && $suffix !~ /_(uvchr|L1)/) - ? 0 # Fail on non-ASCII unless unicode - : ($types{"$native:$class"} || 0); - if (ivers($]) < ivers(5.6) && $suffix eq '_uvchr') { - skip("No UTF-8 on this perl", 1); - next; - } - - my $eval_string = "Devel::PPPort::is${class}$suffix($hex)"; - local $SIG{__WARN__} = sub {}; - my $is = eval $eval_string || 0; - die "eval 'For $i: $eval_string' gave $@" if $@; - is($is, $should_be, "'$eval_string'"); - } - } - - # For all code points, test the '_utf8' macros - my $sub_fcn; - for $sub_fcn ("", "_LC") { - my $skip = ""; - if (ivers($]) < ivers(5.6)) { - $skip = $way_too_early_msg; - } - elsif (ivers($]) < ivers(5.7) && $native > 255) { - $skip = "Perls earlier than 5.7 give wrong answers for above Latin1 code points"; - } - elsif (ivers($]) <= ivers(5.11.3) && $native == 0x2029 && ($class eq 'PRINT' || $class eq 'GRAPH')) { - $skip = "Perls earlier than 5.11.3 considered high space characters as isPRINT and isGRAPH"; - } - elsif ($sub_fcn eq '_LC' && $i < 256) { - $skip = "Testing of code points whose results depend on locale is skipped "; - } - my $fcn = "Devel::PPPort::is${class}${sub_fcn}_utf8_safe"; - my $utf8; - - if ($skip) { - skip $skip, 1; - } - else { - $utf8 = quotemeta Devel::PPPort::uvchr_to_utf8($native); - my $should_be = $types{"$native:$class"} || 0; - my $eval_string = "$fcn(\"$utf8\", 0)"; - local $SIG{__WARN__} = sub {}; - my $is = eval $eval_string || 0; - die "eval 'For $i, $eval_string' gave $@" if $@; - is($is, $should_be, sprintf("For U+%04X '%s'", $native, $eval_string)); - } - - # And for the high code points, test that a too short malformation (the - # -1) causes it to fail - if ($i > 255) { - if ($skip) { - skip $skip, 1; - } - elsif (ivers($]) >= ivers(5.25.9)) { - skip("Prints an annoying error message that khw doesn't know how to easily suppress", 1); - } - else { - my $eval_string = "$fcn(\"$utf8\", -1)"; - local $SIG{__WARN__} = sub {}; - my $is = eval "$eval_string" || 0; - die "eval '$eval_string' gave $@" if $@; - is($is, 0, sprintf("For U+%04X '%s'", $native, $eval_string)); - } - } - } - } -} - -my %case_changing = ( 'LOWER' => [ [ ord('A'), ord('a') ], - [ Devel::PPPort::LATIN1_TO_NATIVE(0xC0), - Devel::PPPort::LATIN1_TO_NATIVE(0xE0) ], - [ 0x100, 0x101 ], - ], - 'FOLD' => [ [ ord('C'), ord('c') ], - [ Devel::PPPort::LATIN1_TO_NATIVE(0xC0), - Devel::PPPort::LATIN1_TO_NATIVE(0xE0) ], - [ 0x104, 0x105 ], - [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF), - 'ss' ], - ], - 'UPPER' => [ [ ord('a'), ord('A'), ], - [ Devel::PPPort::LATIN1_TO_NATIVE(0xE0), - Devel::PPPort::LATIN1_TO_NATIVE(0xC0) ], - [ 0x101, 0x100 ], - [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF), - 'SS' ], - ], - 'TITLE' => [ [ ord('c'), ord('C'), ], - [ Devel::PPPort::LATIN1_TO_NATIVE(0xE2), - Devel::PPPort::LATIN1_TO_NATIVE(0xC2) ], - [ 0x103, 0x102 ], - [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF), - 'Ss' ], - ], - ); - -my $name; -for $name (keys %case_changing) { - my @code_points_to_test = @{$case_changing{$name}}; - my $unchanged; - for $unchanged (@code_points_to_test) { - my @pair = @$unchanged; - my $original = $pair[0]; - my $changed = $pair[1]; - my $utf8_changed = $changed; - my $is_cp = $utf8_changed =~ /^\d+$/; - my $should_be_bytes; - if (ivers($]) >= ivers(5.6)) { - if ($is_cp) { - $utf8_changed = Devel::PPPort::uvchr_to_utf8($changed); - $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0); - } - else { - die("Test currently doesn't work for non-ASCII multi-char case changes") if eval '$utf8_changed =~ /[[:^ascii:]]/'; - $should_be_bytes = length $utf8_changed; - } - } - - my $fcn = "to${name}_uvchr"; - my $skip = ""; - - if (ivers($]) < ivers(5.6)) { - $skip = $way_too_early_msg; - } - elsif (! $is_cp) { - $skip = "Can't do uvchr on a multi-char string"; - } - if ($skip) { - skip $skip, 4; - } - else { - if ($is_cp) { - $utf8_changed = Devel::PPPort::uvchr_to_utf8($changed); - $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0); - } - else { - my $non_ascii_re = (ivers($]) >= ivers(5.6)) ? '[[:^ascii:]]' : '[^\x00-\x7F]'; - die("Test currently doesn't work for non-ASCII multi-char case changes") if eval '$utf8_changed =~ /$non_ascii_re/'; - $should_be_bytes = length $utf8_changed; - } - - my $ret = eval "Devel::PPPort::$fcn($original)"; - my $fail = $@; # Have to save $@, as it gets destroyed - is ($fail, "", "$fcn($original) didn't fail"); - my $first = (ivers($]) != ivers(5.6)) - ? substr($utf8_changed, 0, 1) - : $utf8_changed, 0, 1; - is($ret->[0], ord $first, - "ord of $fcn($original) is $changed"); - is($ret->[1], $utf8_changed, - "UTF-8 of of $fcn($original) is correct"); - is($ret->[2], $should_be_bytes, - "Length of $fcn($original) is $should_be_bytes"); - } - - my $truncate; - for $truncate (0..2) { - my $skip; - if (ivers($]) < ivers(5.6)) { - $skip = $way_too_early_msg; - } - elsif (! $is_cp && ivers($]) < ivers(5.7.3)) { - $skip = "Multi-character case change not implemented until 5.7.3"; - } - elsif ($truncate == 2 && ivers($]) > ivers(5.25.8)) { - $skip = "Zero length inputs cause assertion failure; test dies in modern perls"; - } - elsif ($truncate > 0 && length $changed > 1) { - $skip = "Don't test shortened multi-char case changes"; - } - elsif ($truncate > 0 && Devel::PPPort::UVCHR_IS_INVARIANT($original)) { - $skip = "Don't try to test shortened single bytes"; - } - if ($skip) { - skip $skip, 4; - } - else { - my $fcn = "to${name}_utf8_safe"; - my $utf8 = quotemeta Devel::PPPort::uvchr_to_utf8($original); - my $real_truncate = ($truncate < 2) - ? $truncate : $should_be_bytes; - my $eval_string = "Devel::PPPort::$fcn(\"$utf8\", $real_truncate)"; - my $ret = eval "no warnings; $eval_string" || 0; - my $fail = $@; # Have to save $@, as it gets destroyed - if ($truncate == 0) { - is ($fail, "", "Didn't fail on full length input"); - my $first = (ivers($]) != ivers(5.6)) - ? substr($utf8_changed, 0, 1) - : $utf8_changed, 0, 1; - is($ret->[0], ord $first, - "ord of $fcn($original) is $changed"); - is($ret->[1], $utf8_changed, - "UTF-8 of of $fcn($original) is correct"); - is($ret->[2], $should_be_bytes, - "Length of $fcn($original) is $should_be_bytes"); - } - else { - is ($fail, eval 'qr/Malformed UTF-8 character/', - "Gave appropriate error for short char: $original"); - skip("Expected failure means remaining tests for" - . " this aren't relevant", 3); - } - } - } - } -} - -is(&Devel::PPPort::av_top_index([1,2,3]), 2); -is(&Devel::PPPort::av_tindex([1,2,3,4]), 3); |