Lhogho
0.0.027
|
Defines | |
#define | STRING(x) ((x)->unode.c[2]) |
#define | WORD(x) ((x)->unode.a[3]) |
#define | LENGTH(x) ((x)->unode.u[1] >> 8) |
#define | IDLENGTH(x) ((x)->unode.u[1]) |
#define | IS_WORD(x) (ID(x)==WORD_ID) |
#define | IS_SUBWORD(x) (ID(x)==SUBWORD_ID) |
#define | IS_ANY_WORD(x) ((ID(x)==WORD_ID)||(ID(x)==SUBWORD_ID)) |
#define | WORD_SIZE 256*256*256 |
max word length (in chars) | |
Functions | |
atom_t | new_word (chars_t string, uint_t length) |
creates a new word atom | |
atom_t | new_subword (atom_t word, chars_t string, uint_t length) |
creates a new subword atom | |
atom_t | create_word (int buff_len) |
Creates word with len buff_len and empty data. | |
void | delete_word (atom_t a) |
deletes word atom | |
void | delete_subword (atom_t a) |
deletes subword atom | |
void | dump_word (atom_t a, int level) |
dumps (sub)word atom | |
atom_t | read_word (chars_t filename) |
reads a word from ASCII, UTF-8 or UTF16-LE encoded file | |
void | write_word (atom_t word, chars_t filename) |
writes a word to ASCII or UTF-16LE encoded file | |
int | same_words (atom_t a, atom_t b) |
compares two words | |
atom_t | decode_word (unsigned char *buffer, int size, int dealc) |
decodes ASCII, UTF-8 or UTF16-LE encoded text | |
int | atom_to_boolean (atom_t a, int *np) |
convert atom to a boolean | |
int | atom_to_string (atom_t a, chars_t buff, int *buff_len) |
convert atom to string | |
atom_t | atom_to_word (atom_t data) |
convert atom to word or subword atom | |
atom_t | atom_to_real_word (atom_t data) |
convert atom to word | |
int | same_strings (int ci, chars_t sa, chars_t sb, int n) |
compares two strings | |
Variables | |
int | full_print_p |
integer of FULLPRINTP | |
atom_t | false_true [2] |
array holding words with FALSE and TRUE |
#define STRING | ( | x | ) | ((x)->unode.c[2]) |
#define WORD | ( | x | ) | ((x)->unode.a[3]) |
#define LENGTH | ( | x | ) | ((x)->unode.u[1] >> 8) |
#define IDLENGTH | ( | x | ) | ((x)->unode.u[1]) |
#define IS_SUBWORD | ( | x | ) | (ID(x)==SUBWORD_ID) |
#define IS_ANY_WORD | ( | x | ) | ((ID(x)==WORD_ID)||(ID(x)==SUBWORD_ID)) |
#define WORD_SIZE 256*256*256 |
string | pointer to the first character of the word |
length | length of the word |
Creates a word atom with reference count 1. A NULL_CHAR
is automatically appended to the end of the word.
{ atom_t a = (atom_t)take_from_pool( &data_pool ); if( length==UNKNOWN ) length = STRLEN(string); REF(a) = 1; IDLENGTH(a) = WORD_ID | (length<<8); STRING(a) = (chars_t)ALLOC( (size_t)CHAR_SIZE*(length+1) ); #ifdef SAFEMODE assert( length<WORD_SIZE ); assert( STRING(a) ); #endif //SAFEMODE STRNCPY (STRING(a),string,length); *(STRING(a)+length) = NULL_CHAR; #ifdef DEBUG_ATOM printf("<ATOM> [%08x] word={"STR"}\n",(int)a,string); #endif //DEBUG_ATOM #ifdef ADVANCED stats[ID(a)].allocs++; if( stats[ID(a)].max<(stats[ID(a)].allocs-stats[ID(a)].deallocs) ) stats[ID(a)].max=(stats[ID(a)].allocs-stats[ID(a)].deallocs); stats_free--; #endif //ADVANCED #ifdef DEBUG_RUNTIME_ATOMS if( running_compiled_code ) { outter( TEXT("<RUNTIME> new "), -1 ); dump_atom_address( a ); dump_atom( a, 1 ); outter( TEXT("\n"), -1 ); } #endif #ifdef DEBUG_COMPILETIME_ATOMS if( compiling_code ) { outter( TEXT("<COMPILETIME> new "), -1 ); dump_atom_address( a ); dump_atom( a, 1 ); outter( TEXT("\n"), -1 ); } #endif return a; }
atom_t new_subword | ( | atom_t | word, |
chars_t | string, | ||
uint_t | length | ||
) |
word | word atom containing the host word of the word |
string | pointer to the first character of the word |
length | length of the word |
Creates a subword atom containing a substring of the string in a word atom. The reference count of the subword is 1, the reference count of the host word atom is incremented.
If word
parameter is subword, then it is replaced by its hosting word.
{ atom_t a = (atom_t)take_from_pool( &data_pool ); #ifdef SAFEMODE assert( word ); assert( length<WORD_SIZE ); #endif //SAFEMODE REF(a) = 1; IDLENGTH(a) = SUBWORD_ID | (length << 8); if( IS_SUBWORD(word) ) word = WORD(word); WORD(a) = USE(word); STRING(a) = string; #ifdef DEBUG_ATOM printf("<ATOM> [%08x] subword=",(int)a); char_t x; x = *(string+length); *(string+length) = NULL_CHAR; printf("{"STR"}\n",string); *(string+length) = x; #endif //DEBUG_ATOM #ifdef ADVANCED stats[ID(a)].allocs++; if( stats[ID(a)].max<(stats[ID(a)].allocs-stats[ID(a)].deallocs) ) stats[ID(a)].max=(stats[ID(a)].allocs-stats[ID(a)].deallocs); stats_free--; #endif //ADVANCED #ifdef DEBUG_RUNTIME_ATOMS if( running_compiled_code ) { outter( TEXT("<RUNTIME> new "), -1 ); dump_atom_address( a ); dump_atom( a, 1 ); outter( TEXT("\n"), -1 ); } #endif #ifdef DEBUG_COMPILETIME_ATOMS if( compiling_code ) { outter( TEXT("<COMPILETIME> new "), -1 ); dump_atom_address( a ); dump_atom( a, 1 ); outter( TEXT("\n"), -1 ); } #endif return a; }
atom_t create_word | ( | int | buff_len | ) |
Creates new word with given length and no data set.
{ atom_t word; #ifdef SAFEMODE assert (buff_len < WORD_SIZE); #endif //SAFEMODE word = (atom_t)take_from_pool(&data_pool); REF(word) = 1; IDLENGTH(word) = WORD_ID | (buff_len << 8); STRING(word) = (chars_t)ALLOC((size_t)CHAR_SIZE * (buff_len+1)); #ifdef SAFEMODE assert (STRING(word)); #endif //SAFEMODE #ifdef DEBUG_ATOM printf("<ATOM> [%08x] word with len %d \n", (int)word, buff_len); #endif //DEBUG_ATOM #ifdef ADVANCED stats[ID(word)].allocs++; stats_free--; #endif //ADVANCED return word; }
void delete_word | ( | atom_t | a | ) |
a | atom to delete |
Deletes the word atom and deallocates memory used for the word's characters.
{ DEALLOC( STRING(a) ); return_to_pool( &data_pool, a ); }
void delete_subword | ( | atom_t | a | ) |
a | atom to delete |
Deletes the subword atom and unlinks it from the host word.
{ DEUSE( WORD(a) ); return_to_pool( &data_pool, a ); }
a | atom to dump |
level | dump level |
Dumps word or subword atom through the current outter function.
{ // test for full print if( full_print_p ) { // count the number of dangerous characters int length = LENGTH(a); int dirty = (length == 0); chars_t chs; for( chs=STRING(a); length && !dirty ; chs++,length-- ) { char_t ch = DEBAR(*chs); if( ch==TEXT('[') || ch==TEXT(']') || ch==TEXT('(') || ch==TEXT(')') || ch==TEXT('+') || ch==TEXT('-') || ch==TEXT('*') || ch==TEXT('\\') || ch==TEXT('<') || ch==TEXT('>') || ch==TEXT('=') || ch==TEXT(';') || ch==TEXT('|') || ch==TEXT('~') || ch<=TEXT(' ') ) dirty = 1; } if( dirty ) { length = LENGTH(a); outter( TEXT("|"), 1 ); outter_size += 1; for( chs=STRING(a); length; chs++,length-- ) { char_t ch = DEBAR(*chs); if( ch==TEXT('|') || ch==TEXT('\\') ) { outter( TEXT("\\"), 1 ); outter_size += 1; } outter( chs, 1 ); outter_size += 1; } outter( TEXT("|"), 1 ); outter_size += 1; } else { outter( STRING(a), LENGTH(a) ); outter_size += LENGTH(a); } return; } // no full print int limit = print_width_limit; if( 0<=limit && limit<10 ) limit=10; if( 0<=limit && limit<LENGTH(a) ) { outter( STRING(a), limit ); outter( TEXT("..."), 3 ); outter_size += limit+3; } else { outter( STRING(a), LENGTH(a) ); outter_size += LENGTH(a); } }
filename | file name |
Reads the contents of text file into a word atom. If Unicode mode is switched off, the file is assumed to be in ASCII, otherwise it is UTF-8 or UTF-16LE (little endian) depending on the BOM (byte order mark). In case of error its code is stored in global variable errno
.
{ int filesize; unsigned char* buffer = (unsigned char*)load_file( filename, &filesize ); if( errno ) return new_os_error( filename ); return decode_word( buffer, filesize, 1 ); }
void write_word | ( | atom_t | word, |
chars_t | wfilename | ||
) |
word | word to be written |
wfilename | file name |
Writes the contents of a word atom to a file determined by its name. If Unicode mode is switched of, the encoding is ASCII, otherwise it is UTF-16LE (little endian) and the BOM (byte order mark) is included. In case of error its code is stored in global variable errno
.
{ FILE* file; char* filename = FILENAME(wfilename); // open file errno = 0; file = fopen( filename, "wb" ); // read file into the buffer #ifdef UNICODE_CHARS unsigned char buffer[2] = {0xFF,0xFE}; if( !errno ) fwrite( buffer, 2, 1, file ); #endif //UNICODE_CHARS if( !errno ) fwrite( STRING(word), LENGTH(word), CHAR_SIZE, file ); if( !errno ) fclose( file ); DEALLOC( filename ); }
int same_words | ( | atom_t | a, |
atom_t | b | ||
) |
a | first word |
b | second word |
Compares two word atoms case sensitive or insensitive depending on option_case_insensitive
global flag.
{ #ifdef SAFEMODE assert( IS_WORD(a) || IS_SUBWORD(a) ); assert( IS_WORD(b) || IS_SUBWORD(b) ); #endif unsigned int n = LENGTH(a); if( n!=LENGTH(b) ) return 0; // different sizes return same_strings( OPTION_CASE_INSENSITIVE, STRING(a), STRING(b), n ); }
atom_t decode_word | ( | unsigned char * | buffer, |
int | size, | ||
int | dealc | ||
) |
buffer | buffer containing raw data |
size | size of buffer |
dealc | deallocate (=1) or not (=0) the input buffer |
If Unicode mode is switched off, the raw data are assumed to be in ASCII, otherwise they are UTF-8 or UTF-16LE (little endian) depending on the BOM (byte order mark).
BOM signatures are:
FF FE
FE FF
EF BB BF
The input buffer is deallocated if dealc!=0
{ #ifdef SAFEMODE assert( buffer ); #endif //SAFEMODE atom_t a = (atom_t)take_from_pool( &data_pool ); REF(a) = 1; #ifdef ADVANCED stats[WORD_ID].allocs++; stats_free--; #endif //ADVANCED #ifdef UNICODE_CHARS // UTF-16LE if( buffer[0]==0xFF && buffer[1]==0xFE ) { // reusing the buffer from load_file memcpy( &buffer[0], &buffer[2], size-2 ); buffer[size-2] = 0; buffer[size-1] = 0; IDLENGTH(a) = WORD_ID | (((size-2)>>1)<<8); STRING(a) = (chars_t)buffer; return a; } // UTF-8 if( buffer[0]==0xEF && buffer[1]==0xBB && buffer[2]==0xBF ) { // UTF-8 (header EF BB BF) memcpy( &buffer[0], &buffer[3], size-3 ); buffer[size-3] = 0; STRING(a) = UTF8_to_UTF16( buffer ); IDLENGTH(a) = WORD_ID | (wcslen(STRING(a)))<<8; if( dealc ) DEALLOC( buffer ); return a; } // ASCII buffer[size] = 0; STRING(a) = ASCII_to_UTF16( (char*)buffer ); IDLENGTH(a) = WORD_ID | (STRLEN(STRING(a)))<<8; if( dealc ) DEALLOC( buffer ); #else //UNICODE_CHARS // ASCII buffer[size] = 0; STRING(a) = ASCII_to_ASCII( (char*)buffer ); IDLENGTH(a) = WORD_ID | (STRLEN(STRING(a)))<<8; if( dealc ) DEALLOC( buffer ); #endif //UNICODE_CHARS return a; }
int atom_to_boolean | ( | atom_t | a, |
int * | np | ||
) |
a | atom to convert |
np | pointer where to put the result |
Converts an atom to a boolean integer number, i.e. 0 indicates FALSE and 1 (or non-zero) indicates TRUE. If conversion is possible, then *np will contain the result and the return value will be 1. Otherwise *np will be set to 0 and the return value will be set to 0.
{ *np = 0; // check for FALSE atom if( a == false_true[0] ) { *np = 0; return 1; } // check for TRUE atom if( a == false_true[1] ) { *np = 1; return 1; } // eliminate non-words if( !IS_ANY_WORD(a) ) { return 0; } // check for FALSE word if( LENGTH(a)==LENGTH(false_true[0]) && same_strings( 1, STRING(a), STRING(false_true[0]), LENGTH(a)) ) { *np = 0; return 1; } // check for TRUE word if( LENGTH(a)==LENGTH(false_true[1]) && same_strings( 1, STRING(a), STRING(false_true[1]), LENGTH(a)) ) { *np = 1; return 1; } return 0; }
int atom_to_string | ( | atom_t | a, |
chars_t | buff, | ||
int * | buff_len | ||
) |
a | atom to convert |
buff | buffer where string will be written |
buff_len | length of the buffer |
Converts an atom to a string. If buffer is too small string is truncated. Conversion is successfull if atom is word, subword or number. Else return 0
{ // outter(TEXT("a2s atom=|"),-1);dump(a);outter(TEXT("|\n"),-1); if (IS_ANY_WORD(a)) { int min = LENGTH(a) < *buff_len ? LENGTH(a) : *buff_len; int i; for(i = 0; i < min; ++ i) { buff[i] = STRING(a)[i]; } *buff_len = min; return 1; }else { float64_t x; if (!atom_to_float (a, &x)) { return 0; } // In Window infinities are printed as "1.#INF" instead // of just "inf", and "-1.#IND" instead of "nan". So let's fix this. #ifdef WINDOWS if(isinf(x)!=0 && x > 0) { STRNCPY(buff, TEXT("inf"), *buff_len-1); if (*buff_len > 4) *buff_len = 3; return 1; } if( isinf(x)!=0 && x < 0) { STRNCPY(buff, TEXT("-inf"), *buff_len-1); if (*buff_len > 5) *buff_len = 4; return 1; } if( isnan(x)!=0 ) { STRNCPY(buff, TEXT("nan"), *buff_len-1); if (*buff_len > 4) *buff_len = 3; return 1; } #endif //WINDOWS if( (x>=1e23) || (x<=-1e23) || ((x>=-1e-23) && (x<=1e-23)) ) { SPRINTF( buff, *buff_len, FORMAT_EXTRA_FP, x ); *buff_len = STRLEN(buff); } else { SPRINTF(buff, *buff_len, FORMAT_FP, x); *buff_len = STRLEN(buff); //outter(TEXT("a2s=|"),-1); outter(buff,*buff_len); outter(TEXT("|\n"),-1); //atom_t nn=new_integer(*buff_len); //outter(TEXT("len="),-1); dumpln(nn); while (buff[*buff_len-1] == '0') --*buff_len; if (buff[*buff_len-1] == '.') --*buff_len; buff[*buff_len] = '\0'; } return 1; } }
atom_t atom_to_word | ( | atom_t | data | ) |
data | atom to convert |
Converts an atom to a word or a subword atom. If the input is a (sub)word, then it is returned directly. If it is a number, then a new word is created. Otherwise an error atom is returned.
{ if( IS_ERROR(data) ) return data; if( IS_ANY_WORD(data) ) return USE(data); if( IS_UNBOUND(data) ) return new_error( ERROR_NOT_A_WORD, data ); if( IS_INTEGER(data) || IS_FLOAT(data) ) { #define BUFFER_SIZE 128 char_t ch[BUFFER_SIZE]; int len = BUFFER_SIZE; if( atom_to_string(data,ch,&len) ) return new_word( ch, len ); #undef BUFFER_SIZE } return new_error( ERROR_NOT_A_WORD, data ); }
atom_t atom_to_real_word | ( | atom_t | data | ) |
data | atom to convert |
Converts an atom to a word. This guarantees that the string in the word atom is zero-terminated. If the input is a word then it is reused. Otherwise a new word is created. Note that a new word is created even if the input is a subword.
{ atom_t subword; atom_t word = atom_to_word( data ); if( IS_SUBWORD(word) ) { subword = word; word = new_word( STRING(subword), LENGTH(subword) ); DEUSE( subword ); } return word; }
int same_strings | ( | int | ci, |
chars_t | sa, | ||
chars_t | sb, | ||
int | n | ||
) |
ci | 1=case insensitive, 0=case sensitive |
sa | first string |
sb | second string |
n | string length |
Compares two strings case sensitive or insensitive.
int full_print_p |
atom_t false_true[2] |