/*
* Various routines from the OSTA 2.01 specs. Copyrights are included with
* each code segment. Slight whitespace modifications have been made for
* formatting purposes. Typos/bugs have been fixed.
*
*/
#include "udf_osta.h"
#ifndef _KERNEL
#include <ctype.h>
#endif
/*****************************************************************************/
/***********************************************************************
* OSTA compliant Unicode compression, uncompression routines.
* Copyright 1995 Micro Design International, Inc.
* Written by Jason M. Rinn.
* Micro Design International gives permission for the free use of the
* following source code.
*/
/***********************************************************************
* Takes an OSTA CS0 compressed unicode name, and converts
* it to Unicode.
* The Unicode output will be in the byte order
* that the local compiler uses for 16-bit values.
* NOTE: This routine only performs error checking on the compID.
* It is up to the user to ensure that the unicode buffer is large
* enough, and that the compressed unicode name is correct.
*
* RETURN VALUE
*
* The number of unicode characters which were uncompressed.
* A -1 is returned if the compression ID is invalid.
*/
int
udf_UncompressUnicode(
int numberOfBytes, /* (Input) number of bytes read from media. */
byte *UDFCompressed, /* (Input) bytes read from media. */
unicode_t *unicode) /* (Output) uncompressed unicode characters. */
{
unsigned int compID;
int returnValue, unicodeIndex, byteIndex;
/* Use UDFCompressed to store current byte being read. */
compID = UDFCompressed[0];
/* Translate 254/255 compID values used for deleted entries */
if (compID == 254)
compID = 8;
if (compID == 255)
compID = 16;
/* First check for valid compID. */
if (compID != 8 && compID != 16) {
returnValue = -1;
} else {
unicodeIndex = 0;
byteIndex = 1;
/* Loop through all the bytes. */
while (byteIndex < numberOfBytes) {
if (compID == 16) {
/* Move the first byte to the high bits of the
* unicode char.
*/
unicode[unicodeIndex] =
UDFCompressed[byteIndex++] << 8;
} else {
unicode[unicodeIndex] = 0;
}
if (byteIndex < numberOfBytes) {
/*Then the next byte to the low bits. */
unicode[unicodeIndex] |=
UDFCompressed[byteIndex++];
}
unicodeIndex++;
}
returnValue = unicodeIndex;
}
return(returnValue);
}
/***********************************************************************
* DESCRIPTION:
* Takes a string of unicode wide characters and returns an OSTA CS0
* compressed unicode string. The unicode MUST be in the byte order of
* the compiler in order to obtain correct results. Returns an error
* if the compression ID is invalid.
*
* NOTE: This routine assumes the implementation already knows, by
* the local environment, how many bits are appropriate and
* therefore does no checking to test if the input characters fit
* into that number of bits or not.
*
* RETURN VALUE
*
* The total number of bytes in the compressed OSTA CS0 string,
* including the compression ID.
* A -1 is returned if the compression ID is invalid.
*/
int
udf_CompressUnicode(
int numberOfChars, /* (Input) number of unicode characters. */
int compID, /* (Input) compression ID to be used. */
unicode_t *unicode, /* (Input) unicode characters to compress. */
byte *UDFCompressed) /* (Output) compressed string, as bytes. */
{
int byteIndex, unicodeIndex;
if (compID != 8 && compID != 16) {
byteIndex = -1; /* Unsupported compression ID ! */
} else {
/* Place compression code in first byte. */
UDFCompressed[0] = compID;
byteIndex = 1;
unicodeIndex = 0;
while (unicodeIndex < numberOfChars) {
if (compID == 16) {
/* First, place the high bits of the char
* into the byte stream.
*/
UDFCompressed[byteIndex++] =
(unicode[unicodeIndex] & 0xFF00) >> 8;
}
/*Then place the low bits into the stream. */
UDFCompressed[byteIndex++] =
unicode[unicodeIndex] & 0x00FF;
unicodeIndex++;
}
}
return(byteIndex);
}
/* UNICODE Checksum */
unsigned short
udf_unicode_cksum(unsigned short *s, int n)
{
unsigned short crc=0;
while (n-- > 0) {
/* Take high order byte first--corresponds to a big endian
* byte stream.
*/
crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8);
crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8);
}
return crc;
}
/*
* Calculates a 16-bit checksum of the Implementation Use
* Extended Attribute header or Application Use Extended Attribute
* header. The fields AttributeType through ImplementationIdentifier
* (or ApplicationIdentifier) inclusively represent the
* data covered by the checksum (48 bytes).
*
*/
uint16_t udf_ea_cksum(uint8_t *data) {
uint16_t checksum = 0;
int count;
main(void)
{
unsigned short x;
x = cksum(bytes, sizeof bytes);
printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299);
exit(0);
}
#endif
/*****************************************************************************/
/* #ifdef NEEDS_ISPRINT */
/***********************************************************************
* OSTA UDF compliant file name translation routine for OS/2,
* Windows 95, Windows NT, Macintosh and UNIX.
* Copyright 1995 Micro Design International, Inc.
* Written by Jason M. Rinn.
* Micro Design International gives permission for the free use of the
* following source code.
*/
/***********************************************************************
* To use these routines with different operating systems.
*
* OS/2
* Define OS2
* Define MAXLEN = 254
*
* Windows 95
* Define WIN_95
* Define MAXLEN = 255
*
* Windows NT
* Define WIN_NT
* Define MAXLEN = 255
*
* Macintosh:
* Define MAC.
* Define MAXLEN = 31.
*
* UNIX
* Define UNIX.
* Define MAXLEN as specified by unix version.
*/
#define ILLEGAL_CHAR_MARK 0x005F
#define CRC_MARK 0x0023
#define EXT_SIZE 5
#define PERIOD 0x002E
#define SPACE 0x0020
/*** PROTOTYPES ***/
int IsIllegal(unicode_t ch);
/* Define a function or macro which determines if a Unicode character is
* printable under your implementation.
*/
int UnicodeLength(unicode_t *string) {
int length;
length = 0;
while (*string++) length++;
return length;
}
#ifdef _KERNEL
static int isprint(int c) {
return (c >= ' ') && (c != 127);
}
#endif
/***********************************************************************
* Translates a long file name to one using a MAXLEN and an illegal
* char set in accord with the OSTA requirements. Assumes the name has
* already been translated to Unicode.
*
* RETURN VALUE
*
* Number of unicode characters in translated name.
*/
int UDFTransName(
unicode_t *newName, /* (Output)Translated name. Must be of length
* MAXLEN */
unicode_t *udfName, /* (Input) Name from UDF volume.*/
int udfLen) /* (Input) Length of UDF Name. */
{
int Index, newIndex = 0, needsCRC = false; /* index is shadowed */
int extIndex = 0, newExtIndex = 0, hasExt = false;
#if defined OS2 || defined WIN_95 || defined WIN_NT
int trailIndex = 0;
#endif
unsigned short valueCRC;
unicode_t current;
const char hexChar[] = "0123456789ABCDEF";
for (Index = 0; Index < udfLen; Index++) {
current = udfName[Index];
if (IsIllegal(current) || !UnicodeIsPrint(current)) {
needsCRC = true;
/* Replace Illegal and non-displayable chars with
* underscore.
*/
current = ILLEGAL_CHAR_MARK;
/* Skip any other illegal or non-displayable
* characters.
*/
while(Index+1 < udfLen && (IsIllegal(udfName[Index+1])
|| !UnicodeIsPrint(udfName[Index+1]))) {
Index++;
}
}
/* Record position of extension, if one is found. */
if (current == PERIOD && (udfLen - Index -1) <= EXT_SIZE) {
if (udfLen == Index + 1) {
/* A trailing period is NOT an extension. */
hasExt = false;
} else {
hasExt = true;
extIndex = Index;
newExtIndex = newIndex;
}
}
#if defined OS2 || defined WIN_95 || defined WIN_NT
/* Record position of last char which is NOT period or space. */
else if (current != PERIOD && current != SPACE) {
trailIndex = newIndex;
}
#endif
#if defined OS2 || defined WIN_95 || defined WIN_NT
/* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */
if (trailIndex != newIndex - 1) {
newIndex = trailIndex + 1;
needsCRC = true;
hasExt = false; /* Trailing period does not make an
* extension. */
}
#endif
if (needsCRC) {
unicode_t ext[EXT_SIZE];
int localExtIndex = 0;
if (hasExt) {
int maxFilenameLen;
/* Translate extension, and store it in ext. */
for(Index = 0; Index<EXT_SIZE &&
extIndex + Index +1 < udfLen; Index++ ) {
current = udfName[extIndex + Index + 1];
if (IsIllegal(current) ||
!UnicodeIsPrint(current)) {
needsCRC = 1;
/* Replace Illegal and non-displayable
* chars with underscore.
*/
current = ILLEGAL_CHAR_MARK;
/* Skip any other illegal or
* non-displayable characters.
*/
while(Index + 1 < EXT_SIZE
&& (IsIllegal(udfName[extIndex +
Index + 2]) ||
!isprint(udfName[extIndex +
Index + 2]))) {
Index++;
}
}
ext[localExtIndex++] = current;
}
/* Truncate filename to leave room for extension and
* CRC.
*/
maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1);
if (newIndex > maxFilenameLen) {
newIndex = maxFilenameLen;
} else {
newIndex = newExtIndex;
}
} else if (newIndex > MAXLEN - 5) {
/*If no extension, make sure to leave room for CRC. */
newIndex = MAXLEN - 5;
}
newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */
/*Calculate CRC from original filename from FileIdentifier. */
valueCRC = udf_unicode_cksum(udfName, udfLen);
/* Convert 16-bits of CRC to hex characters. */
newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
/* Place a translated extension at end, if found. */
if (hasExt) {
newName[newIndex++] = PERIOD;
for (Index = 0;Index < localExtIndex ;Index++ ) {
newName[newIndex++] = ext[Index];
}
}
}
return(newIndex);
}
#if defined OS2 || defined WIN_95 || defined WIN_NT
/***********************************************************************
* Decides if a Unicode character matches one of a list
* of ASCII characters.
* Used by OS2 version of IsIllegal for readability, since all of the
* illegal characters above 0x0020 are in the ASCII subset of Unicode.
* Works very similarly to the standard C function strchr().
*
* RETURN VALUE
*
* Non-zero if the Unicode character is in the given ASCII string.
*/
int UnicodeInString(
unsigned char *string, /* (Input) String to search through. */
unicode_t ch) /* (Input) Unicode char to search for. */
{
int found = false;
while (*string != '\0' && found == false) {
/* These types should compare, since both are unsigned
* numbers. */
if (*string == ch) {
found = true;
}
string++;
}
return(found);
}
#endif /* OS2 */
/***********************************************************************
* Decides whether the given character is illegal for a given OS.
*
* RETURN VALUE
*
* Non-zero if char is illegal.
*/
int IsIllegal(unicode_t ch)
{
#ifdef MAC
/* Only illegal character on the MAC is the colon. */
if (ch == 0x003A) {
return(1);
} else {
return(0);
}
#elif defined UNIX
/* Illegal UNIX characters are NULL and slash. */
if (ch == 0x0000 || ch == 0x002F) {
return(1);
} else {
return(0);
}
#elif defined OS2 || defined WIN_95 || defined WIN_NT
/* Illegal char's for OS/2 according to WARP toolkit. */
if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) {
return(1);
} else {
return(0);
}
#endif
}
/* #endif*/ /* NEEDS_ISPRINT */