|
Lhogho
0.0.027
|
Converters | |
Used to convert one string to another. In ASCII mode all conversions are like identities - i.e. no conversion is actually done. | |
| #define | TEXT(a) L##a |
| fix string constants | |
| #define | FILENAME(x) UTF16_to_ASCII(x) |
| fix file names | |
| #define | UNFILENAME(x) ASCII_to_UTF16(x) |
| unfix file names | |
Format strings | |
| #define | NULL_CHAR TEXT('\0') |
| #define | STR "%S" |
| #define | CHR "%C" |
String functions | |
They are used to maintain dual UNICODE/ASCII processing. This is needed because there are different names for mutibyte and widechar functions. | |
| #define | NO_MORE WEOF |
| #define | PUTCHAR(x, y) putwc(x,y) |
| #define | GETCHAR(x) getwc(x) |
| #define | STRLEN(x) wcslen(x) |
| #define | STRNCPY(x, y, z) wcsncpy(x,y,z) |
| #define | STRCMP(x, y) wcscmp(x,y) |
| #define | STRTOD(x, y) wcstod(x,y) |
| #define | STRTOL(x, y) wcstoll(x,y,0) |
| #define | TOUPPER(x) towupper(x) |
| #define | TOLOWER(x) towlower(x) |
| #define | ISDIGIT(x) iswdigit(x) |
| #define | STRCHR(x, y) wcschr(x,y) |
| #define | PRINT(x,...) printf(x,__VA_ARGS__) |
| #define | STRFTIME(x, y, z, t) wcsftime(x,y,z,t) |
| #define | SPRINTF(x, n, y, z) swprintf(x,n,y,z) |
| #define | SPRINT(x, n, y,...) swprintf(x,n,y,__VA_ARGS__) |
Functions | |
| chars_t | UTF8_to_UTF16 (byte_t *source) |
| converts UTF-8 to UTF-16 | |
| byte_t * | UTF16_to_UTF8 (chars_t source) |
| char * | UTF16_to_ASCII (chars_t ws) |
| converts UTF-16 to ASCII | |
| chars_t | ASCII_to_UTF16 (const char *s) |
| converts ASCII to UTF-16 | |
| chars_t | ASCII_to_ASCII (const char *s) |
| converts ASCII to ASCII | |
| void * | load_file (chars_t wfilename, int *filesize) |
| load text file into a word atom | |
| #define TEXT | ( | a | ) | L##a |
| #define FILENAME | ( | x | ) | UTF16_to_ASCII(x) |
| #define UNFILENAME | ( | x | ) | ASCII_to_UTF16(x) |
| #define STR "%S" |
| #define CHR "%C" |
| #define NO_MORE WEOF |
| #define PUTCHAR | ( | x, | |
| y | |||
| ) | putwc(x,y) |
| #define GETCHAR | ( | x | ) | getwc(x) |
| #define STRLEN | ( | x | ) | wcslen(x) |
| #define STRNCPY | ( | x, | |
| y, | |||
| z | |||
| ) | wcsncpy(x,y,z) |
| #define STRCMP | ( | x, | |
| y | |||
| ) | wcscmp(x,y) |
| #define STRTOD | ( | x, | |
| y | |||
| ) | wcstod(x,y) |
| #define STRTOL | ( | x, | |
| y | |||
| ) | wcstoll(x,y,0) |
| #define TOUPPER | ( | x | ) | towupper(x) |
| #define TOLOWER | ( | x | ) | towlower(x) |
| #define ISDIGIT | ( | x | ) | iswdigit(x) |
| #define STRCHR | ( | x, | |
| y | |||
| ) | wcschr(x,y) |
| #define PRINT | ( | x, | |
| ... | |||
| ) | printf(x,__VA_ARGS__) |
| #define STRFTIME | ( | x, | |
| y, | |||
| z, | |||
| t | |||
| ) | wcsftime(x,y,z,t) |
| #define SPRINTF | ( | x, | |
| n, | |||
| y, | |||
| z | |||
| ) | swprintf(x,n,y,z) |
| #define SPRINT | ( | x, | |
| n, | |||
| y, | |||
| ... | |||
| ) | swprintf(x,n,y,__VA_ARGS__) |
| chars_t UTF8_to_UTF16 | ( | byte_t * | source | ) |
| source | characters to convert |
Converts string of multibyte UTF-8 encoding to widechar UTF-16LE encoding.
{
int len = strlen ((char*)source);
wchar_t *buffer = alloca( CHAR_SIZE*(len+1) );
wchar_t *buf = buffer;
unsigned long wc;
while( len>0 )
{
if( (*source & 0x80)==0x00 )
{ // 00-7F [0zzz-zzzz]
wc = (byte_t)*source++;
len -= 1;
}
else if( (*source & 0xE0)==0xC0 )
{ // 080-7FF [110y-yyyy] [10zz-zzzz]
wc = *source & 0x1F;
source++;
wc = (wc<<6) + (*source & 0x3F);
source++;
len -= 2;
}
else if( ((byte_t)*source & 0xF0)==0xE0 )
{ // 0800-FFFF [1110-xxxx] [10yy-yyyy] [10zz-zzzz]
wc = *source & 0x1F;
source++;
wc = (wc<<6) + (*source & 0x3F);
source++;
wc = (wc<<6) + (*source & 0x3F);
source++;
len -= 3;
}
else
{ // 01000-10FFFF [1111-wwww] [10xx-xxxx] [10yy-yyyy] [10zz-zzzz]
wc = *source & 0x1F;
source++;
wc = (wc<<6) + (*source & 0x3F);
source++;
wc = (wc<<6) + (*source & 0x3F);
source++;
wc = (wc<<6) + (*source & 0x3F);
source++;
len -= 4;
}
*buf = (unsigned short)wc;
buf++;
}
*buf = L'\0';
len = CHAR_SIZE*(buf-buffer+1);
buf = ALLOC( len );
memcpy( buf, buffer, len );
return buf;
}
| byte_t* UTF16_to_UTF8 | ( | chars_t | source | ) |
{
int len = STRLEN( source );
byte_t *buffer = alloca( len+1 );
byte_t *buf = buffer;
while( len>0 )
{
char_t wc = *source;
//printf("\nCODE=%4x|",wc);
if( wc < 0x0080 )
{ // 0000-007F
// from: [0xxxxxxx]
// to: [0xxxxxxx]
*buf++ = (byte_t)wc;
}
else if( wc < 0x0800 )
{ // 0080-07FF
// from: [00000yyy yyxxxxxx]
// to: [110yyyyy] [10xxxxxx]
*buf++ = 0xC0 | (byte_t)(wc >> 6);
*buf++ = 0x80 | (byte_t)(wc & 0x3F);
}
else
{ // 0800-FFFF
// from: [zzzzyyyy yyxxxxxx]
// to: [1110zzzz] [10yyyyyy] [10xxxxxx]
*buf++ = 0xE0 | (byte_t)(wc >> 12);
*buf++ = 0x80 | (byte_t)((wc >> 6) & 0x3F);
*buf++ = 0x80 | (byte_t)(wc & 0x3F);
}
len--;
source++;
}
*buf = '\0';
len = buf-buffer+1;
buf = ALLOC( len );
memcpy( buf, buffer, len );
return buf;
}
| char* UTF16_to_ASCII | ( | chars_t | ws | ) |
Converts string of widechar UTF-16LE encoding to ASCII encoding. The input string is not freed.
| ws | characters to convert |
{
size_t len = wcslen( ws );
char* buffer = alloca( 4*(len+1) ); // assume one utf16 can expand to 4 bytes max
char* buf = buffer;
mbstate_t state;
size_t nbytes;
memset (&state, '\0', sizeof (state));
while (len>0)
{
nbytes = wcrtomb (buf, *ws, &state);
buf += nbytes;
len -= 1;
ws += 1;
}
*buf = '\0';
len = buf-buffer+1;
buf = ALLOC( len );
memcpy( buf, buffer, len );
return buf;
}
| chars_t ASCII_to_UTF16 | ( | const char * | s | ) |
Converts string of ASCII encoding to widechar UTF-16LE encoding.
| s | characters to convert |
{
//printf("===%s===\n",s);
size_t len = strlen(s);
wchar_t *buffer = ALLOC( CHAR_SIZE*(len+1) );
wchar_t *buf = buffer;
mbstate_t state;
size_t nbytes;
memset (&state, '\0', sizeof (state));
while (len>0)
{
nbytes = mbrtowc (buf, s, len, &state);
buf++;
len -= nbytes;
s += nbytes;
}
*buf = L'\0';
len = CHAR_SIZE*(buf-buffer+1);
buf = ALLOC( len );
memcpy( buf, buffer, len );
DEALLOC( buffer );
return buf;
}
| chars_t ASCII_to_ASCII | ( | const char * | s | ) |
| s | characters to convert |
Converts string of ASCII encoding to ASCII. Actually does not covert anything. This function is used because it uses the ALLOC() macro which helps tracing memory allocation.
| wfilename | file name |
| filesize | file size |
Loads a text file which can be ASCII, multibyte UTF-8 or widechar UTF-16LE encoding. The size of the file is returned in filesize so that the caller can append null character if needed.
{
FILE* file; // file stream
void* buffer; // file buffer
struct stat st_info; // file attributes
// convert filename to multibyte
char* filename = FILENAME(wfilename);
errno = 0;
// open file
file = fopen( filename, "rb" );
#ifdef SAFEMODE
if( errno ) return NULL;
#endif //SAFEMODE
// get file size
fstat( fileno(file), &st_info );
*filesize = st_info.st_size;
// file name is not needed any more
#ifdef UNICODE_CHARS
DEALLOC( filename );
#endif
// allocate buffer
buffer = ALLOC(*filesize+1);
#ifdef SAFEMODE
if( !buffer )
{
fclose( file );
errno = ENOMEM;
return NULL;
}
#endif //SAFEMODE
// read file into the buffer
if( *filesize && !fread(buffer,1,*filesize,file ) )
{
#ifdef SAFEMODE
DEALLOC( buffer );
return NULL;
#endif //SAFEMODE
}
fclose( file );
#ifdef SAFEMODE
if( errno )
{
free( buffer );
return NULL;
}
#endif //SAFEMODE
return buffer;
}