diff options
Diffstat (limited to 'sys/isofs/udf/osta.c')
-rw-r--r-- | sys/isofs/udf/osta.c | 514 |
1 files changed, 514 insertions, 0 deletions
diff --git a/sys/isofs/udf/osta.c b/sys/isofs/udf/osta.c new file mode 100644 index 00000000000..3fc07b6e0ba --- /dev/null +++ b/sys/isofs/udf/osta.c @@ -0,0 +1,514 @@ +/* $OpenBSD: osta.c,v 1.1 2005/03/29 17:24:52 pedro Exp $ */ + +/* + * Various routines from the OSTA 2.01 specs. Copyrights are included with + * each code segment. Slight whitespace modifications have been made for + * formatting purposes. Typos/bugs have been fixed. + * + * $FreeBSD: src/sys/fs/udf/osta.c,v 1.4 2005/01/06 18:10:41 imp Exp $ + */ + +#include <isofs/udf/osta.h> + +/*****************************************************************************/ +/*- + ********************************************************************** + * OSTA compliant Unicode compression, uncompression routines. + * Copyright 1995 Micro Design International, Inc. + * Written by Jason M. Rinn. + * Micro Design International gives permission for the free use of the + * following source code. + */ + +/*********************************************************************** + * Takes an OSTA CS0 compressed unicode name, and converts + * it to Unicode. + * The Unicode output will be in the byte order + * that the local compiler uses for 16-bit values. + * NOTE: This routine only performs error checking on the compID. + * It is up to the user to ensure that the unicode buffer is large + * enough, and that the compressed unicode name is correct. + * + * RETURN VALUE + * + * The number of unicode characters which were uncompressed. + * A -1 is returned if the compression ID is invalid. + */ +int +udf_UncompressUnicode( + int numberOfBytes, /* (Input) number of bytes read from media. */ + byte *UDFCompressed, /* (Input) bytes read from media. */ + unicode_t *unicode) /* (Output) uncompressed unicode characters. */ +{ + unsigned int compID; + int returnValue, unicodeIndex, byteIndex; + + /* Use UDFCompressed to store current byte being read. */ + compID = UDFCompressed[0]; + + /* First check for valid compID. */ + if (compID != 8 && compID != 16) { + returnValue = -1; + } else { + unicodeIndex = 0; + byteIndex = 1; + + /* Loop through all the bytes. */ + while (byteIndex < numberOfBytes) { + if (compID == 16) { + /* Move the first byte to the high bits of the + * unicode char. + */ + unicode[unicodeIndex] = + UDFCompressed[byteIndex++] << 8; + } else { + unicode[unicodeIndex] = 0; + } + if (byteIndex < numberOfBytes) { + /*Then the next byte to the low bits. */ + unicode[unicodeIndex] |= + UDFCompressed[byteIndex++]; + } + unicodeIndex++; + } + returnValue = unicodeIndex; + } + return(returnValue); +} + +/* + * Almost same as udf_UncompressUnicode(). The difference is that + * it keeps byte order of unicode string. + */ +int +udf_UncompressUnicodeByte( + int numberOfBytes, /* (Input) number of bytes read from media. */ + byte *UDFCompressed, /* (Input) bytes read from media. */ + byte *unicode) /* (Output) uncompressed unicode characters. */ +{ + unsigned int compID; + int returnValue, unicodeIndex, byteIndex; + + /* Use UDFCompressed to store current byte being read. */ + compID = UDFCompressed[0]; + + /* First check for valid compID. */ + if (compID != 8 && compID != 16) { + returnValue = -1; + } else { + unicodeIndex = 0; + byteIndex = 1; + + /* Loop through all the bytes. */ + while (byteIndex < numberOfBytes) { + if (compID == 16) { + /* Move the first byte to the high bits of the + * unicode char. + */ + unicode[unicodeIndex++] = + UDFCompressed[byteIndex++]; + } else { + unicode[unicodeIndex++] = 0; + } + if (byteIndex < numberOfBytes) { + /*Then the next byte to the low bits. */ + unicode[unicodeIndex++] = + UDFCompressed[byteIndex++]; + } + } + returnValue = unicodeIndex; + } + return(returnValue); +} + +/*********************************************************************** + * DESCRIPTION: + * Takes a string of unicode wide characters and returns an OSTA CS0 + * compressed unicode string. The unicode MUST be in the byte order of + * the compiler in order to obtain correct results. Returns an error + * if the compression ID is invalid. + * + * NOTE: This routine assumes the implementation already knows, by + * the local environment, how many bits are appropriate and + * therefore does no checking to test if the input characters fit + * into that number of bits or not. + * + * RETURN VALUE + * + * The total number of bytes in the compressed OSTA CS0 string, + * including the compression ID. + * A -1 is returned if the compression ID is invalid. + */ +int +udf_CompressUnicode( + int numberOfChars, /* (Input) number of unicode characters. */ + int compID, /* (Input) compression ID to be used. */ + unicode_t *unicode, /* (Input) unicode characters to compress. */ + byte *UDFCompressed) /* (Output) compressed string, as bytes. */ +{ + int byteIndex, unicodeIndex; + + if (compID != 8 && compID != 16) { + byteIndex = -1; /* Unsupported compression ID ! */ + } else { + /* Place compression code in first byte. */ + UDFCompressed[0] = compID; + + byteIndex = 1; + unicodeIndex = 0; + while (unicodeIndex < numberOfChars) { + if (compID == 16) { + /* First, place the high bits of the char + * into the byte stream. + */ + UDFCompressed[byteIndex++] = + (unicode[unicodeIndex] & 0xFF00) >> 8; + } + /*Then place the low bits into the stream. */ + UDFCompressed[byteIndex++] = + unicode[unicodeIndex] & 0x00FF; + unicodeIndex++; + } + } + return(byteIndex); +} + +/*****************************************************************************/ +/* + * CRC 010041 + */ +static unsigned short crc_table[256] = { + 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7, + 0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF, + 0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6, + 0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE, + 0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485, + 0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D, + 0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4, + 0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC, + 0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823, + 0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B, + 0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12, + 0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A, + 0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41, + 0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49, + 0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70, + 0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78, + 0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F, + 0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067, + 0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E, + 0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256, + 0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D, + 0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, + 0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C, + 0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634, + 0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB, + 0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3, + 0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A, + 0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92, + 0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9, + 0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1, + 0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8, + 0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0 +}; + +unsigned short +udf_cksum(s, n) + unsigned char *s; + int n; +{ + unsigned short crc=0; + + while (n-- > 0) + crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8); + return crc; +} + +/* UNICODE Checksum */ +unsigned short +udf_unicode_cksum(s, n) + unsigned short *s; + int n; +{ + unsigned short crc=0; + + while (n-- > 0) { + /* Take high order byte first--corresponds to a big endian + * byte stream. + */ + crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8); + crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8); + } + return crc; +} + +#ifdef MAIN +unsigned char bytes[] = { 0x70, 0x6A, 0x77 }; + +main() +{ + unsigned short x; + x = cksum(bytes, sizeof bytes); + printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299); + exit(0); +} +#endif + +/*****************************************************************************/ +#ifdef NEEDS_ISPRINT +/*- + ********************************************************************** + * OSTA UDF compliant file name translation routine for OS/2, + * Windows 95, Windows NT, Macintosh and UNIX. + * Copyright 1995 Micro Design International, Inc. + * Written by Jason M. Rinn. + * Micro Design International gives permission for the free use of the + * following source code. + */ + +/*********************************************************************** + * To use these routines with different operating systems. + * + * OS/2 + * Define OS2 + * Define MAXLEN = 254 + * + * Windows 95 + * Define WIN_95 + * Define MAXLEN = 255 + * + * Windows NT + * Define WIN_NT + * Define MAXLEN = 255 + * + * Macintosh: + * Define MAC. + * Define MAXLEN = 31. + * + * UNIX + * Define UNIX. + * Define MAXLEN as specified by unix version. + */ + +#define ILLEGAL_CHAR_MARK 0x005F +#define CRC_MARK 0x0023 +#define EXT_SIZE 5 +#define TRUE 1 +#define FALSE 0 +#define PERIOD 0x002E +#define SPACE 0x0020 + +/*** PROTOTYPES ***/ +int IsIllegal(unicode_t ch); + +/* Define a function or macro which determines if a Unicode character is + * printable under your implementation. + */ +int UnicodeIsPrint(unicode_t); + +/*********************************************************************** + * Translates a long file name to one using a MAXLEN and an illegal + * char set in accord with the OSTA requirements. Assumes the name has + * already been translated to Unicode. + * + * RETURN VALUE + * + * Number of unicode characters in translated name. + */ +int UDFTransName( + unicode_t *newName, /* (Output)Translated name. Must be of length + * MAXLEN */ + unicode_t *udfName, /* (Input) Name from UDF volume.*/ + int udfLen) /* (Input) Length of UDF Name. */ +{ + int index, newIndex = 0, needsCRC = FALSE; + int extIndex = 0, newExtIndex = 0, hasExt = FALSE; +#if defined OS2 || defined WIN_95 || defined WIN_NT + int trailIndex = 0; +#endif + unsigned short valueCRC; + unicode_t current; + const char hexChar[] = "0123456789ABCDEF"; + + for (index = 0; index < udfLen; index++) { + current = udfName[index]; + + if (IsIllegal(current) || !UnicodeIsPrint(current)) { + needsCRC = TRUE; + /* Replace Illegal and non-displayable chars with + * underscore. + */ + current = ILLEGAL_CHAR_MARK; + /* Skip any other illegal or non-displayable + * characters. + */ + while(index+1 < udfLen && (IsIllegal(udfName[index+1]) + || !UnicodeIsPrint(udfName[index+1]))) { + index++; + } + } + + /* Record position of extension, if one is found. */ + if (current == PERIOD && (udfLen - index -1) <= EXT_SIZE) { + if (udfLen == index + 1) { + /* A trailing period is NOT an extension. */ + hasExt = FALSE; + } else { + hasExt = TRUE; + extIndex = index; + newExtIndex = newIndex; + } + } + +#if defined OS2 || defined WIN_95 || defined WIN_NT + /* Record position of last char which is NOT period or space. */ + else if (current != PERIOD && current != SPACE) { + trailIndex = newIndex; + } +#endif + + if (newIndex < MAXLEN) { + newName[newIndex++] = current; + } else { + needsCRC = TRUE; + } + } + +#if defined OS2 || defined WIN_95 || defined WIN_NT + /* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */ + if (trailIndex != newIndex - 1) { + newIndex = trailIndex + 1; + needsCRC = TRUE; + hasExt = FALSE; /* Trailing period does not make an + * extension. */ + } +#endif + + if (needsCRC) { + unicode_t ext[EXT_SIZE]; + int localExtIndex = 0; + if (hasExt) { + int maxFilenameLen; + /* Translate extension, and store it in ext. */ + for(index = 0; index<EXT_SIZE && + extIndex + index +1 < udfLen; index++ ) { + current = udfName[extIndex + index + 1]; + if (IsIllegal(current) || + !UnicodeIsPrint(current)) { + needsCRC = 1; + /* Replace Illegal and non-displayable + * chars with underscore. + */ + current = ILLEGAL_CHAR_MARK; + /* Skip any other illegal or + * non-displayable characters. + */ + while(index + 1 < EXT_SIZE + && (IsIllegal(udfName[extIndex + + index + 2]) || + !isprint(udfName[extIndex + + index + 2]))) { + index++; + } + } + ext[localExtIndex++] = current; + } + + /* Truncate filename to leave room for extension and + * CRC. + */ + maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1); + if (newIndex > maxFilenameLen) { + newIndex = maxFilenameLen; + } else { + newIndex = newExtIndex; + } + } else if (newIndex > MAXLEN - 5) { + /*If no extension, make sure to leave room for CRC. */ + newIndex = MAXLEN - 5; + } + newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */ + + /*Calculate CRC from original filename from FileIdentifier. */ + valueCRC = udf_unicode_cksum(udfName, udfLen); + /* Convert 16-bits of CRC to hex characters. */ + newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12]; + newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8]; + newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4]; + newName[newIndex++] = hexChar[(valueCRC & 0x000f)]; + + /* Place a translated extension at end, if found. */ + if (hasExt) { + newName[newIndex++] = PERIOD; + for (index = 0;index < localExtIndex ;index++ ) { + newName[newIndex++] = ext[index]; + } + } + } + return(newIndex); +} + +#if defined OS2 || defined WIN_95 || defined WIN_NT +/*********************************************************************** + * Decides if a Unicode character matches one of a list + * of ASCII characters. + * Used by OS2 version of IsIllegal for readability, since all of the + * illegal characters above 0x0020 are in the ASCII subset of Unicode. + * Works very similarly to the standard C function strchr(). + * + * RETURN VALUE + * + * Non-zero if the Unicode character is in the given ASCII string. + */ +int UnicodeInString( + unsigned char *string, /* (Input) String to search through. */ + unicode_t ch) /* (Input) Unicode char to search for. */ +{ + int found = FALSE; + while (*string != '\0' && found == FALSE) { + /* These types should compare, since both are unsigned + * numbers. */ + if (*string == ch) { + found = TRUE; + } + string++; + } + return(found); +} +#endif /* OS2 */ + +/*********************************************************************** + * Decides whether the given character is illegal for a given OS. + * + * RETURN VALUE + * + * Non-zero if char is illegal. + */ +int IsIllegal(unicode_t ch) +{ +#ifdef MAC + /* Only illegal character on the MAC is the colon. */ + if (ch == 0x003A) { + return(1); + } else { + return(0); + } + +#elif defined UNIX + /* Illegal UNIX characters are NULL and slash. */ + if (ch == 0x0000 || ch == 0x002F) { + return(1); + } else { + return(0); + } + +#elif defined OS2 || defined WIN_95 || defined WIN_NT + /* Illegal char's for OS/2 according to WARP toolkit. */ + if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) { + return(1); + } else { + return(0); + } +#endif +} +#endif |