|
Lhogho
0.0.027
|
Defines | |
| #define | STRING(x) ((x)->unode.c[2]) |
| #define | WORD(x) ((x)->unode.a[3]) |
| #define | LENGTH(x) ((x)->unode.u[1] >> 8) |
| #define | IDLENGTH(x) ((x)->unode.u[1]) |
| #define | IS_WORD(x) (ID(x)==WORD_ID) |
| #define | IS_SUBWORD(x) (ID(x)==SUBWORD_ID) |
| #define | IS_ANY_WORD(x) ((ID(x)==WORD_ID)||(ID(x)==SUBWORD_ID)) |
| #define | WORD_SIZE 256*256*256 |
| max word length (in chars) | |
Functions | |
| atom_t | new_word (chars_t string, uint_t length) |
| creates a new word atom | |
| atom_t | new_subword (atom_t word, chars_t string, uint_t length) |
| creates a new subword atom | |
| atom_t | create_word (int buff_len) |
| Creates word with len buff_len and empty data. | |
| void | delete_word (atom_t a) |
| deletes word atom | |
| void | delete_subword (atom_t a) |
| deletes subword atom | |
| void | dump_word (atom_t a, int level) |
| dumps (sub)word atom | |
| atom_t | read_word (chars_t filename) |
| reads a word from ASCII, UTF-8 or UTF16-LE encoded file | |
| void | write_word (atom_t word, chars_t filename) |
| writes a word to ASCII or UTF-16LE encoded file | |
| int | same_words (atom_t a, atom_t b) |
| compares two words | |
| atom_t | decode_word (unsigned char *buffer, int size, int dealc) |
| decodes ASCII, UTF-8 or UTF16-LE encoded text | |
| int | atom_to_boolean (atom_t a, int *np) |
| convert atom to a boolean | |
| int | atom_to_string (atom_t a, chars_t buff, int *buff_len) |
| convert atom to string | |
| atom_t | atom_to_word (atom_t data) |
| convert atom to word or subword atom | |
| atom_t | atom_to_real_word (atom_t data) |
| convert atom to word | |
| int | same_strings (int ci, chars_t sa, chars_t sb, int n) |
| compares two strings | |
Variables | |
| int | full_print_p |
| integer of FULLPRINTP | |
| atom_t | false_true [2] |
| array holding words with FALSE and TRUE | |
| #define STRING | ( | x | ) | ((x)->unode.c[2]) |
| #define WORD | ( | x | ) | ((x)->unode.a[3]) |
| #define LENGTH | ( | x | ) | ((x)->unode.u[1] >> 8) |
| #define IDLENGTH | ( | x | ) | ((x)->unode.u[1]) |
| #define IS_SUBWORD | ( | x | ) | (ID(x)==SUBWORD_ID) |
| #define IS_ANY_WORD | ( | x | ) | ((ID(x)==WORD_ID)||(ID(x)==SUBWORD_ID)) |
| #define WORD_SIZE 256*256*256 |
| string | pointer to the first character of the word |
| length | length of the word |
Creates a word atom with reference count 1. A NULL_CHAR is automatically appended to the end of the word.
{
atom_t a = (atom_t)take_from_pool( &data_pool );
if( length==UNKNOWN ) length = STRLEN(string);
REF(a) = 1;
IDLENGTH(a) = WORD_ID | (length<<8);
STRING(a) = (chars_t)ALLOC( (size_t)CHAR_SIZE*(length+1) );
#ifdef SAFEMODE
assert( length<WORD_SIZE );
assert( STRING(a) );
#endif //SAFEMODE
STRNCPY (STRING(a),string,length);
*(STRING(a)+length) = NULL_CHAR;
#ifdef DEBUG_ATOM
printf("<ATOM> [%08x] word={"STR"}\n",(int)a,string);
#endif //DEBUG_ATOM
#ifdef ADVANCED
stats[ID(a)].allocs++;
if( stats[ID(a)].max<(stats[ID(a)].allocs-stats[ID(a)].deallocs) )
stats[ID(a)].max=(stats[ID(a)].allocs-stats[ID(a)].deallocs);
stats_free--;
#endif //ADVANCED
#ifdef DEBUG_RUNTIME_ATOMS
if( running_compiled_code )
{
outter( TEXT("<RUNTIME> new "), -1 );
dump_atom_address( a );
dump_atom( a, 1 );
outter( TEXT("\n"), -1 );
}
#endif
#ifdef DEBUG_COMPILETIME_ATOMS
if( compiling_code )
{
outter( TEXT("<COMPILETIME> new "), -1 );
dump_atom_address( a );
dump_atom( a, 1 );
outter( TEXT("\n"), -1 );
}
#endif
return a;
}
| atom_t new_subword | ( | atom_t | word, |
| chars_t | string, | ||
| uint_t | length | ||
| ) |
| word | word atom containing the host word of the word |
| string | pointer to the first character of the word |
| length | length of the word |
Creates a subword atom containing a substring of the string in a word atom. The reference count of the subword is 1, the reference count of the host word atom is incremented.
If word parameter is subword, then it is replaced by its hosting word.
{
atom_t a = (atom_t)take_from_pool( &data_pool );
#ifdef SAFEMODE
assert( word );
assert( length<WORD_SIZE );
#endif //SAFEMODE
REF(a) = 1;
IDLENGTH(a) = SUBWORD_ID | (length << 8);
if( IS_SUBWORD(word) ) word = WORD(word);
WORD(a) = USE(word);
STRING(a) = string;
#ifdef DEBUG_ATOM
printf("<ATOM> [%08x] subword=",(int)a);
char_t x;
x = *(string+length);
*(string+length) = NULL_CHAR;
printf("{"STR"}\n",string);
*(string+length) = x;
#endif //DEBUG_ATOM
#ifdef ADVANCED
stats[ID(a)].allocs++;
if( stats[ID(a)].max<(stats[ID(a)].allocs-stats[ID(a)].deallocs) )
stats[ID(a)].max=(stats[ID(a)].allocs-stats[ID(a)].deallocs);
stats_free--;
#endif //ADVANCED
#ifdef DEBUG_RUNTIME_ATOMS
if( running_compiled_code )
{
outter( TEXT("<RUNTIME> new "), -1 );
dump_atom_address( a );
dump_atom( a, 1 );
outter( TEXT("\n"), -1 );
}
#endif
#ifdef DEBUG_COMPILETIME_ATOMS
if( compiling_code )
{
outter( TEXT("<COMPILETIME> new "), -1 );
dump_atom_address( a );
dump_atom( a, 1 );
outter( TEXT("\n"), -1 );
}
#endif
return a;
}
| atom_t create_word | ( | int | buff_len | ) |
Creates new word with given length and no data set.
{
atom_t word;
#ifdef SAFEMODE
assert (buff_len < WORD_SIZE);
#endif //SAFEMODE
word = (atom_t)take_from_pool(&data_pool);
REF(word) = 1;
IDLENGTH(word) = WORD_ID | (buff_len << 8);
STRING(word) = (chars_t)ALLOC((size_t)CHAR_SIZE * (buff_len+1));
#ifdef SAFEMODE
assert (STRING(word));
#endif //SAFEMODE
#ifdef DEBUG_ATOM
printf("<ATOM> [%08x] word with len %d \n", (int)word, buff_len);
#endif //DEBUG_ATOM
#ifdef ADVANCED
stats[ID(word)].allocs++;
stats_free--;
#endif //ADVANCED
return word;
}
| void delete_word | ( | atom_t | a | ) |
| a | atom to delete |
Deletes the word atom and deallocates memory used for the word's characters.
{
DEALLOC( STRING(a) );
return_to_pool( &data_pool, a );
}
| void delete_subword | ( | atom_t | a | ) |
| a | atom to delete |
Deletes the subword atom and unlinks it from the host word.
{
DEUSE( WORD(a) );
return_to_pool( &data_pool, a );
}
| a | atom to dump |
| level | dump level |
Dumps word or subword atom through the current outter function.
{
// test for full print
if( full_print_p )
{
// count the number of dangerous characters
int length = LENGTH(a);
int dirty = (length == 0);
chars_t chs;
for( chs=STRING(a); length && !dirty ; chs++,length-- )
{
char_t ch = DEBAR(*chs);
if( ch==TEXT('[') || ch==TEXT(']') ||
ch==TEXT('(') || ch==TEXT(')') ||
ch==TEXT('+') || ch==TEXT('-') ||
ch==TEXT('*') || ch==TEXT('\\') ||
ch==TEXT('<') || ch==TEXT('>') ||
ch==TEXT('=') || ch==TEXT(';') ||
ch==TEXT('|') || ch==TEXT('~') ||
ch<=TEXT(' ') )
dirty = 1;
}
if( dirty )
{
length = LENGTH(a);
outter( TEXT("|"), 1 );
outter_size += 1;
for( chs=STRING(a); length; chs++,length-- )
{
char_t ch = DEBAR(*chs);
if( ch==TEXT('|') || ch==TEXT('\\') )
{
outter( TEXT("\\"), 1 );
outter_size += 1;
}
outter( chs, 1 );
outter_size += 1;
}
outter( TEXT("|"), 1 );
outter_size += 1;
}
else
{
outter( STRING(a), LENGTH(a) );
outter_size += LENGTH(a);
}
return;
}
// no full print
int limit = print_width_limit;
if( 0<=limit && limit<10 ) limit=10;
if( 0<=limit && limit<LENGTH(a) )
{
outter( STRING(a), limit );
outter( TEXT("..."), 3 );
outter_size += limit+3;
}
else
{
outter( STRING(a), LENGTH(a) );
outter_size += LENGTH(a);
}
}
| filename | file name |
Reads the contents of text file into a word atom. If Unicode mode is switched off, the file is assumed to be in ASCII, otherwise it is UTF-8 or UTF-16LE (little endian) depending on the BOM (byte order mark). In case of error its code is stored in global variable errno.
{
int filesize;
unsigned char* buffer = (unsigned char*)load_file( filename, &filesize );
if( errno ) return new_os_error( filename );
return decode_word( buffer, filesize, 1 );
}
| void write_word | ( | atom_t | word, |
| chars_t | wfilename | ||
| ) |
| word | word to be written |
| wfilename | file name |
Writes the contents of a word atom to a file determined by its name. If Unicode mode is switched of, the encoding is ASCII, otherwise it is UTF-16LE (little endian) and the BOM (byte order mark) is included. In case of error its code is stored in global variable errno.
{
FILE* file;
char* filename = FILENAME(wfilename);
// open file
errno = 0;
file = fopen( filename, "wb" );
// read file into the buffer
#ifdef UNICODE_CHARS
unsigned char buffer[2] = {0xFF,0xFE};
if( !errno ) fwrite( buffer, 2, 1, file );
#endif //UNICODE_CHARS
if( !errno ) fwrite( STRING(word), LENGTH(word), CHAR_SIZE, file );
if( !errno ) fclose( file );
DEALLOC( filename );
}
| int same_words | ( | atom_t | a, |
| atom_t | b | ||
| ) |
| a | first word |
| b | second word |
Compares two word atoms case sensitive or insensitive depending on option_case_insensitive global flag.
{
#ifdef SAFEMODE
assert( IS_WORD(a) || IS_SUBWORD(a) );
assert( IS_WORD(b) || IS_SUBWORD(b) );
#endif
unsigned int n = LENGTH(a);
if( n!=LENGTH(b) ) return 0; // different sizes
return same_strings( OPTION_CASE_INSENSITIVE, STRING(a), STRING(b), n );
}
| atom_t decode_word | ( | unsigned char * | buffer, |
| int | size, | ||
| int | dealc | ||
| ) |
| buffer | buffer containing raw data |
| size | size of buffer |
| dealc | deallocate (=1) or not (=0) the input buffer |
If Unicode mode is switched off, the raw data are assumed to be in ASCII, otherwise they are UTF-8 or UTF-16LE (little endian) depending on the BOM (byte order mark).
BOM signatures are:
FF FEFE FFEF BB BFThe input buffer is deallocated if dealc!=0
{
#ifdef SAFEMODE
assert( buffer );
#endif //SAFEMODE
atom_t a = (atom_t)take_from_pool( &data_pool );
REF(a) = 1;
#ifdef ADVANCED
stats[WORD_ID].allocs++;
stats_free--;
#endif //ADVANCED
#ifdef UNICODE_CHARS
// UTF-16LE
if( buffer[0]==0xFF && buffer[1]==0xFE )
{ // reusing the buffer from load_file
memcpy( &buffer[0], &buffer[2], size-2 );
buffer[size-2] = 0;
buffer[size-1] = 0;
IDLENGTH(a) = WORD_ID | (((size-2)>>1)<<8);
STRING(a) = (chars_t)buffer;
return a;
}
// UTF-8
if( buffer[0]==0xEF && buffer[1]==0xBB && buffer[2]==0xBF )
{ // UTF-8 (header EF BB BF)
memcpy( &buffer[0], &buffer[3], size-3 );
buffer[size-3] = 0;
STRING(a) = UTF8_to_UTF16( buffer );
IDLENGTH(a) = WORD_ID | (wcslen(STRING(a)))<<8;
if( dealc ) DEALLOC( buffer );
return a;
}
// ASCII
buffer[size] = 0;
STRING(a) = ASCII_to_UTF16( (char*)buffer );
IDLENGTH(a) = WORD_ID | (STRLEN(STRING(a)))<<8;
if( dealc ) DEALLOC( buffer );
#else //UNICODE_CHARS
// ASCII
buffer[size] = 0;
STRING(a) = ASCII_to_ASCII( (char*)buffer );
IDLENGTH(a) = WORD_ID | (STRLEN(STRING(a)))<<8;
if( dealc ) DEALLOC( buffer );
#endif //UNICODE_CHARS
return a;
}
| int atom_to_boolean | ( | atom_t | a, |
| int * | np | ||
| ) |
| a | atom to convert |
| np | pointer where to put the result |
Converts an atom to a boolean integer number, i.e. 0 indicates FALSE and 1 (or non-zero) indicates TRUE. If conversion is possible, then *np will contain the result and the return value will be 1. Otherwise *np will be set to 0 and the return value will be set to 0.
{
*np = 0;
// check for FALSE atom
if( a == false_true[0] )
{
*np = 0;
return 1;
}
// check for TRUE atom
if( a == false_true[1] )
{
*np = 1;
return 1;
}
// eliminate non-words
if( !IS_ANY_WORD(a) )
{
return 0;
}
// check for FALSE word
if( LENGTH(a)==LENGTH(false_true[0]) &&
same_strings( 1, STRING(a), STRING(false_true[0]), LENGTH(a)) )
{
*np = 0;
return 1;
}
// check for TRUE word
if( LENGTH(a)==LENGTH(false_true[1]) &&
same_strings( 1, STRING(a), STRING(false_true[1]), LENGTH(a)) )
{
*np = 1;
return 1;
}
return 0;
}
| int atom_to_string | ( | atom_t | a, |
| chars_t | buff, | ||
| int * | buff_len | ||
| ) |
| a | atom to convert |
| buff | buffer where string will be written |
| buff_len | length of the buffer |
Converts an atom to a string. If buffer is too small string is truncated. Conversion is successfull if atom is word, subword or number. Else return 0
{
// outter(TEXT("a2s atom=|"),-1);dump(a);outter(TEXT("|\n"),-1);
if (IS_ANY_WORD(a))
{
int min = LENGTH(a) < *buff_len ? LENGTH(a) : *buff_len;
int i;
for(i = 0; i < min; ++ i)
{
buff[i] = STRING(a)[i];
}
*buff_len = min;
return 1;
}else
{
float64_t x;
if (!atom_to_float (a, &x))
{
return 0;
}
// In Window infinities are printed as "1.#INF" instead
// of just "inf", and "-1.#IND" instead of "nan". So let's fix this.
#ifdef WINDOWS
if(isinf(x)!=0 && x > 0)
{
STRNCPY(buff, TEXT("inf"), *buff_len-1);
if (*buff_len > 4) *buff_len = 3;
return 1;
}
if( isinf(x)!=0 && x < 0)
{
STRNCPY(buff, TEXT("-inf"), *buff_len-1);
if (*buff_len > 5) *buff_len = 4;
return 1;
}
if( isnan(x)!=0 )
{
STRNCPY(buff, TEXT("nan"), *buff_len-1);
if (*buff_len > 4) *buff_len = 3;
return 1;
}
#endif //WINDOWS
if( (x>=1e23) || (x<=-1e23) || ((x>=-1e-23) && (x<=1e-23)) )
{
SPRINTF( buff, *buff_len, FORMAT_EXTRA_FP, x );
*buff_len = STRLEN(buff);
}
else
{
SPRINTF(buff, *buff_len, FORMAT_FP, x);
*buff_len = STRLEN(buff);
//outter(TEXT("a2s=|"),-1); outter(buff,*buff_len); outter(TEXT("|\n"),-1);
//atom_t nn=new_integer(*buff_len);
//outter(TEXT("len="),-1); dumpln(nn);
while (buff[*buff_len-1] == '0') --*buff_len;
if (buff[*buff_len-1] == '.') --*buff_len;
buff[*buff_len] = '\0';
}
return 1;
}
}
| atom_t atom_to_word | ( | atom_t | data | ) |
| data | atom to convert |
Converts an atom to a word or a subword atom. If the input is a (sub)word, then it is returned directly. If it is a number, then a new word is created. Otherwise an error atom is returned.
{
if( IS_ERROR(data) ) return data;
if( IS_ANY_WORD(data) ) return USE(data);
if( IS_UNBOUND(data) ) return new_error( ERROR_NOT_A_WORD, data );
if( IS_INTEGER(data) || IS_FLOAT(data) )
{
#define BUFFER_SIZE 128
char_t ch[BUFFER_SIZE];
int len = BUFFER_SIZE;
if( atom_to_string(data,ch,&len) ) return new_word( ch, len );
#undef BUFFER_SIZE
}
return new_error( ERROR_NOT_A_WORD, data );
}
| atom_t atom_to_real_word | ( | atom_t | data | ) |
| data | atom to convert |
Converts an atom to a word. This guarantees that the string in the word atom is zero-terminated. If the input is a word then it is reused. Otherwise a new word is created. Note that a new word is created even if the input is a subword.
{
atom_t subword;
atom_t word = atom_to_word( data );
if( IS_SUBWORD(word) )
{
subword = word;
word = new_word( STRING(subword), LENGTH(subword) );
DEUSE( subword );
}
return word;
}
| int same_strings | ( | int | ci, |
| chars_t | sa, | ||
| chars_t | sb, | ||
| int | n | ||
| ) |
| ci | 1=case insensitive, 0=case sensitive |
| sa | first string |
| sb | second string |
| n | string length |
Compares two strings case sensitive or insensitive.
| int full_print_p |
| atom_t false_true[2] |