Lhogho
0.0.027
|
Functions | |
char * | UTF16_to_ASCII (chars_t ws) |
converts UTF-16 to ASCII | |
chars_t | ASCII_to_UTF16 (const char *s) |
converts ASCII to UTF-16 | |
chars_t | ASCII_to_ASCII (const char *s) |
converts ASCII to ASCII | |
byte_t * | UTF16_to_UTF8 (chars_t source) |
chars_t | UTF8_to_UTF16 (byte_t *source) |
converts UTF-8 to UTF-16 | |
void * | load_file (chars_t wfilename, int *filesize) |
load text file into a word atom |
char * UTF16_to_ASCII | ( | chars_t | ws | ) |
Converts string of widechar UTF-16LE encoding to ASCII encoding. The input string is not freed.
ws | characters to convert |
{ size_t len = wcslen( ws ); char* buffer = alloca( 4*(len+1) ); // assume one utf16 can expand to 4 bytes max char* buf = buffer; mbstate_t state; size_t nbytes; memset (&state, '\0', sizeof (state)); while (len>0) { nbytes = wcrtomb (buf, *ws, &state); buf += nbytes; len -= 1; ws += 1; } *buf = '\0'; len = buf-buffer+1; buf = ALLOC( len ); memcpy( buf, buffer, len ); return buf; }
chars_t ASCII_to_UTF16 | ( | const char * | s | ) |
Converts string of ASCII encoding to widechar UTF-16LE encoding.
s | characters to convert |
{ //printf("===%s===\n",s); size_t len = strlen(s); wchar_t *buffer = ALLOC( CHAR_SIZE*(len+1) ); wchar_t *buf = buffer; mbstate_t state; size_t nbytes; memset (&state, '\0', sizeof (state)); while (len>0) { nbytes = mbrtowc (buf, s, len, &state); buf++; len -= nbytes; s += nbytes; } *buf = L'\0'; len = CHAR_SIZE*(buf-buffer+1); buf = ALLOC( len ); memcpy( buf, buffer, len ); DEALLOC( buffer ); return buf; }
chars_t ASCII_to_ASCII | ( | const char * | s | ) |
s | characters to convert |
Converts string of ASCII encoding to ASCII. Actually does not covert anything. This function is used because it uses the ALLOC() macro which helps tracing memory allocation.
byte_t* UTF16_to_UTF8 | ( | chars_t | source | ) |
{ int len = STRLEN( source ); byte_t *buffer = alloca( len+1 ); byte_t *buf = buffer; while( len>0 ) { char_t wc = *source; //printf("\nCODE=%4x|",wc); if( wc < 0x0080 ) { // 0000-007F // from: [0xxxxxxx] // to: [0xxxxxxx] *buf++ = (byte_t)wc; } else if( wc < 0x0800 ) { // 0080-07FF // from: [00000yyy yyxxxxxx] // to: [110yyyyy] [10xxxxxx] *buf++ = 0xC0 | (byte_t)(wc >> 6); *buf++ = 0x80 | (byte_t)(wc & 0x3F); } else { // 0800-FFFF // from: [zzzzyyyy yyxxxxxx] // to: [1110zzzz] [10yyyyyy] [10xxxxxx] *buf++ = 0xE0 | (byte_t)(wc >> 12); *buf++ = 0x80 | (byte_t)((wc >> 6) & 0x3F); *buf++ = 0x80 | (byte_t)(wc & 0x3F); } len--; source++; } *buf = '\0'; len = buf-buffer+1; buf = ALLOC( len ); memcpy( buf, buffer, len ); return buf; }
chars_t UTF8_to_UTF16 | ( | byte_t * | source | ) |
source | characters to convert |
Converts string of multibyte UTF-8 encoding to widechar UTF-16LE encoding.
{ int len = strlen ((char*)source); wchar_t *buffer = alloca( CHAR_SIZE*(len+1) ); wchar_t *buf = buffer; unsigned long wc; while( len>0 ) { if( (*source & 0x80)==0x00 ) { // 00-7F [0zzz-zzzz] wc = (byte_t)*source++; len -= 1; } else if( (*source & 0xE0)==0xC0 ) { // 080-7FF [110y-yyyy] [10zz-zzzz] wc = *source & 0x1F; source++; wc = (wc<<6) + (*source & 0x3F); source++; len -= 2; } else if( ((byte_t)*source & 0xF0)==0xE0 ) { // 0800-FFFF [1110-xxxx] [10yy-yyyy] [10zz-zzzz] wc = *source & 0x1F; source++; wc = (wc<<6) + (*source & 0x3F); source++; wc = (wc<<6) + (*source & 0x3F); source++; len -= 3; } else { // 01000-10FFFF [1111-wwww] [10xx-xxxx] [10yy-yyyy] [10zz-zzzz] wc = *source & 0x1F; source++; wc = (wc<<6) + (*source & 0x3F); source++; wc = (wc<<6) + (*source & 0x3F); source++; wc = (wc<<6) + (*source & 0x3F); source++; len -= 4; } *buf = (unsigned short)wc; buf++; } *buf = L'\0'; len = CHAR_SIZE*(buf-buffer+1); buf = ALLOC( len ); memcpy( buf, buffer, len ); return buf; }
wfilename | file name |
filesize | file size |
Loads a text file which can be ASCII, multibyte UTF-8 or widechar UTF-16LE encoding. The size of the file is returned in filesize
so that the caller can append null character if needed.
{ FILE* file; // file stream void* buffer; // file buffer struct stat st_info; // file attributes // convert filename to multibyte char* filename = FILENAME(wfilename); errno = 0; // open file file = fopen( filename, "rb" ); #ifdef SAFEMODE if( errno ) return NULL; #endif //SAFEMODE // get file size fstat( fileno(file), &st_info ); *filesize = st_info.st_size; // file name is not needed any more #ifdef UNICODE_CHARS DEALLOC( filename ); #endif // allocate buffer buffer = ALLOC(*filesize+1); #ifdef SAFEMODE if( !buffer ) { fclose( file ); errno = ENOMEM; return NULL; } #endif //SAFEMODE // read file into the buffer if( *filesize && !fread(buffer,1,*filesize,file ) ) { #ifdef SAFEMODE DEALLOC( buffer ); return NULL; #endif //SAFEMODE } fclose( file ); #ifdef SAFEMODE if( errno ) { free( buffer ); return NULL; } #endif //SAFEMODE return buffer; }