|
Lhogho
0.0.027
|
Functions | |
| char * | UTF16_to_ASCII (chars_t ws) |
| converts UTF-16 to ASCII | |
| chars_t | ASCII_to_UTF16 (const char *s) |
| converts ASCII to UTF-16 | |
| chars_t | ASCII_to_ASCII (const char *s) |
| converts ASCII to ASCII | |
| byte_t * | UTF16_to_UTF8 (chars_t source) |
| chars_t | UTF8_to_UTF16 (byte_t *source) |
| converts UTF-8 to UTF-16 | |
| void * | load_file (chars_t wfilename, int *filesize) |
| load text file into a word atom | |
| char * UTF16_to_ASCII | ( | chars_t | ws | ) |
Converts string of widechar UTF-16LE encoding to ASCII encoding. The input string is not freed.
| ws | characters to convert |
{
size_t len = wcslen( ws );
char* buffer = alloca( 4*(len+1) ); // assume one utf16 can expand to 4 bytes max
char* buf = buffer;
mbstate_t state;
size_t nbytes;
memset (&state, '\0', sizeof (state));
while (len>0)
{
nbytes = wcrtomb (buf, *ws, &state);
buf += nbytes;
len -= 1;
ws += 1;
}
*buf = '\0';
len = buf-buffer+1;
buf = ALLOC( len );
memcpy( buf, buffer, len );
return buf;
}
| chars_t ASCII_to_UTF16 | ( | const char * | s | ) |
Converts string of ASCII encoding to widechar UTF-16LE encoding.
| s | characters to convert |
{
//printf("===%s===\n",s);
size_t len = strlen(s);
wchar_t *buffer = ALLOC( CHAR_SIZE*(len+1) );
wchar_t *buf = buffer;
mbstate_t state;
size_t nbytes;
memset (&state, '\0', sizeof (state));
while (len>0)
{
nbytes = mbrtowc (buf, s, len, &state);
buf++;
len -= nbytes;
s += nbytes;
}
*buf = L'\0';
len = CHAR_SIZE*(buf-buffer+1);
buf = ALLOC( len );
memcpy( buf, buffer, len );
DEALLOC( buffer );
return buf;
}
| chars_t ASCII_to_ASCII | ( | const char * | s | ) |
| s | characters to convert |
Converts string of ASCII encoding to ASCII. Actually does not covert anything. This function is used because it uses the ALLOC() macro which helps tracing memory allocation.
| byte_t* UTF16_to_UTF8 | ( | chars_t | source | ) |
{
int len = STRLEN( source );
byte_t *buffer = alloca( len+1 );
byte_t *buf = buffer;
while( len>0 )
{
char_t wc = *source;
//printf("\nCODE=%4x|",wc);
if( wc < 0x0080 )
{ // 0000-007F
// from: [0xxxxxxx]
// to: [0xxxxxxx]
*buf++ = (byte_t)wc;
}
else if( wc < 0x0800 )
{ // 0080-07FF
// from: [00000yyy yyxxxxxx]
// to: [110yyyyy] [10xxxxxx]
*buf++ = 0xC0 | (byte_t)(wc >> 6);
*buf++ = 0x80 | (byte_t)(wc & 0x3F);
}
else
{ // 0800-FFFF
// from: [zzzzyyyy yyxxxxxx]
// to: [1110zzzz] [10yyyyyy] [10xxxxxx]
*buf++ = 0xE0 | (byte_t)(wc >> 12);
*buf++ = 0x80 | (byte_t)((wc >> 6) & 0x3F);
*buf++ = 0x80 | (byte_t)(wc & 0x3F);
}
len--;
source++;
}
*buf = '\0';
len = buf-buffer+1;
buf = ALLOC( len );
memcpy( buf, buffer, len );
return buf;
}
| chars_t UTF8_to_UTF16 | ( | byte_t * | source | ) |
| source | characters to convert |
Converts string of multibyte UTF-8 encoding to widechar UTF-16LE encoding.
{
int len = strlen ((char*)source);
wchar_t *buffer = alloca( CHAR_SIZE*(len+1) );
wchar_t *buf = buffer;
unsigned long wc;
while( len>0 )
{
if( (*source & 0x80)==0x00 )
{ // 00-7F [0zzz-zzzz]
wc = (byte_t)*source++;
len -= 1;
}
else if( (*source & 0xE0)==0xC0 )
{ // 080-7FF [110y-yyyy] [10zz-zzzz]
wc = *source & 0x1F;
source++;
wc = (wc<<6) + (*source & 0x3F);
source++;
len -= 2;
}
else if( ((byte_t)*source & 0xF0)==0xE0 )
{ // 0800-FFFF [1110-xxxx] [10yy-yyyy] [10zz-zzzz]
wc = *source & 0x1F;
source++;
wc = (wc<<6) + (*source & 0x3F);
source++;
wc = (wc<<6) + (*source & 0x3F);
source++;
len -= 3;
}
else
{ // 01000-10FFFF [1111-wwww] [10xx-xxxx] [10yy-yyyy] [10zz-zzzz]
wc = *source & 0x1F;
source++;
wc = (wc<<6) + (*source & 0x3F);
source++;
wc = (wc<<6) + (*source & 0x3F);
source++;
wc = (wc<<6) + (*source & 0x3F);
source++;
len -= 4;
}
*buf = (unsigned short)wc;
buf++;
}
*buf = L'\0';
len = CHAR_SIZE*(buf-buffer+1);
buf = ALLOC( len );
memcpy( buf, buffer, len );
return buf;
}
| wfilename | file name |
| filesize | file size |
Loads a text file which can be ASCII, multibyte UTF-8 or widechar UTF-16LE encoding. The size of the file is returned in filesize so that the caller can append null character if needed.
{
FILE* file; // file stream
void* buffer; // file buffer
struct stat st_info; // file attributes
// convert filename to multibyte
char* filename = FILENAME(wfilename);
errno = 0;
// open file
file = fopen( filename, "rb" );
#ifdef SAFEMODE
if( errno ) return NULL;
#endif //SAFEMODE
// get file size
fstat( fileno(file), &st_info );
*filesize = st_info.st_size;
// file name is not needed any more
#ifdef UNICODE_CHARS
DEALLOC( filename );
#endif
// allocate buffer
buffer = ALLOC(*filesize+1);
#ifdef SAFEMODE
if( !buffer )
{
fclose( file );
errno = ENOMEM;
return NULL;
}
#endif //SAFEMODE
// read file into the buffer
if( *filesize && !fread(buffer,1,*filesize,file ) )
{
#ifdef SAFEMODE
DEALLOC( buffer );
return NULL;
#endif //SAFEMODE
}
fclose( file );
#ifdef SAFEMODE
if( errno )
{
free( buffer );
return NULL;
}
#endif //SAFEMODE
return buffer;
}