Lhogho  0.0.027
Defines | Functions | Variables
words.h File Reference

Defines

#define STRING(x)   ((x)->unode.c[2])
#define WORD(x)   ((x)->unode.a[3])
#define LENGTH(x)   ((x)->unode.u[1] >> 8)
#define IDLENGTH(x)   ((x)->unode.u[1])
#define IS_WORD(x)   (ID(x)==WORD_ID)
#define IS_SUBWORD(x)   (ID(x)==SUBWORD_ID)
#define IS_ANY_WORD(x)   ((ID(x)==WORD_ID)||(ID(x)==SUBWORD_ID))
#define WORD_SIZE   256*256*256
 max word length (in chars)

Functions

atom_t new_word (chars_t string, uint_t length)
 creates a new word atom
atom_t new_subword (atom_t word, chars_t string, uint_t length)
 creates a new subword atom
atom_t create_word (int buff_len)
 Creates word with len buff_len and empty data.
void delete_word (atom_t a)
 deletes word atom
void delete_subword (atom_t a)
 deletes subword atom
void dump_word (atom_t a, int level)
 dumps (sub)word atom
atom_t read_word (chars_t filename)
 reads a word from ASCII, UTF-8 or UTF16-LE encoded file
void write_word (atom_t word, chars_t filename)
 writes a word to ASCII or UTF-16LE encoded file
int same_words (atom_t a, atom_t b)
 compares two words
atom_t decode_word (unsigned char *buffer, int size, int dealc)
 decodes ASCII, UTF-8 or UTF16-LE encoded text
int atom_to_boolean (atom_t a, int *np)
 convert atom to a boolean
int atom_to_string (atom_t a, chars_t buff, int *buff_len)
 convert atom to string
atom_t atom_to_word (atom_t data)
 convert atom to word or subword atom
atom_t atom_to_real_word (atom_t data)
 convert atom to word
int same_strings (int ci, chars_t sa, chars_t sb, int n)
 compares two strings

Variables

int full_print_p
 integer of FULLPRINTP
atom_t false_true [2]
 array holding words with FALSE and TRUE

Define Documentation

#define STRING (   x)    ((x)->unode.c[2])
#define WORD (   x)    ((x)->unode.a[3])
#define LENGTH (   x)    ((x)->unode.u[1] >> 8)
#define IDLENGTH (   x)    ((x)->unode.u[1])
#define IS_WORD (   x)    (ID(x)==WORD_ID)
#define IS_SUBWORD (   x)    (ID(x)==SUBWORD_ID)
#define IS_ANY_WORD (   x)    ((ID(x)==WORD_ID)||(ID(x)==SUBWORD_ID))
#define WORD_SIZE   256*256*256

Function Documentation

atom_t new_word ( chars_t  string,
uint_t  length 
)
Parameters:
stringpointer to the first character of the word
lengthlength of the word
Returns:
word atom

Creates a word atom with reference count 1. A NULL_CHAR is automatically appended to the end of the word.

{
  atom_t a = (atom_t)take_from_pool( &data_pool );

  if( length==UNKNOWN ) length = STRLEN(string);
  REF(a)      = 1;
  IDLENGTH(a) = WORD_ID | (length<<8);
  STRING(a)   = (chars_t)ALLOC( (size_t)CHAR_SIZE*(length+1) );

  #ifdef SAFEMODE
    assert( length<WORD_SIZE );
    assert( STRING(a) );
  #endif //SAFEMODE

  STRNCPY (STRING(a),string,length);
  *(STRING(a)+length) = NULL_CHAR;

  #ifdef DEBUG_ATOM
    printf("<ATOM>  [%08x] word={"STR"}\n",(int)a,string);
  #endif //DEBUG_ATOM

  #ifdef ADVANCED
    stats[ID(a)].allocs++;
    if( stats[ID(a)].max<(stats[ID(a)].allocs-stats[ID(a)].deallocs) )
      stats[ID(a)].max=(stats[ID(a)].allocs-stats[ID(a)].deallocs);
    stats_free--;
  #endif //ADVANCED

  #ifdef DEBUG_RUNTIME_ATOMS
  if( running_compiled_code )
    {
      outter( TEXT("<RUNTIME> new  "), -1 );
      dump_atom_address( a );
      dump_atom( a, 1 );
      outter( TEXT("\n"), -1 );
    }
  #endif
  #ifdef DEBUG_COMPILETIME_ATOMS
  if( compiling_code )
    {
      outter( TEXT("<COMPILETIME> new  "), -1 );
      dump_atom_address( a );
      dump_atom( a, 1 );
      outter( TEXT("\n"), -1 );
    }
  #endif

  return a;
}
atom_t new_subword ( atom_t  word,
chars_t  string,
uint_t  length 
)
Parameters:
wordword atom containing the host word of the word
stringpointer to the first character of the word
lengthlength of the word
Returns:
subword atom

Creates a subword atom containing a substring of the string in a word atom. The reference count of the subword is 1, the reference count of the host word atom is incremented.

If word parameter is subword, then it is replaced by its hosting word.

{
  atom_t a = (atom_t)take_from_pool( &data_pool );

  #ifdef SAFEMODE
    assert( word );
    assert( length<WORD_SIZE );
  #endif //SAFEMODE

  REF(a)      = 1;
  IDLENGTH(a) = SUBWORD_ID | (length << 8);
  if( IS_SUBWORD(word) ) word = WORD(word);
  WORD(a)     = USE(word);
  STRING(a)   = string;

  #ifdef DEBUG_ATOM
    printf("<ATOM>  [%08x] subword=",(int)a);
    char_t x;
    x = *(string+length);
    *(string+length) = NULL_CHAR;
    printf("{"STR"}\n",string);
    *(string+length) = x;
  #endif //DEBUG_ATOM

  #ifdef ADVANCED
    stats[ID(a)].allocs++;
    if( stats[ID(a)].max<(stats[ID(a)].allocs-stats[ID(a)].deallocs) )
      stats[ID(a)].max=(stats[ID(a)].allocs-stats[ID(a)].deallocs);
    stats_free--;
  #endif //ADVANCED

  #ifdef DEBUG_RUNTIME_ATOMS
  if( running_compiled_code )
    {
      outter( TEXT("<RUNTIME> new  "), -1 );
      dump_atom_address( a );
      dump_atom( a, 1 );
      outter( TEXT("\n"), -1 );
    }
  #endif
  #ifdef DEBUG_COMPILETIME_ATOMS
  if( compiling_code )
    {
      outter( TEXT("<COMPILETIME> new  "), -1 );
      dump_atom_address( a );
      dump_atom( a, 1 );
      outter( TEXT("\n"), -1 );
    }
  #endif

  return a;
}
atom_t create_word ( int  buff_len)
Returns:
new word

Creates new word with given length and no data set.

{
    atom_t word;

#ifdef SAFEMODE
    assert (buff_len < WORD_SIZE);
#endif //SAFEMODE

    word = (atom_t)take_from_pool(&data_pool);

    REF(word)      = 1;
    IDLENGTH(word) = WORD_ID | (buff_len << 8);
    STRING(word)   = (chars_t)ALLOC((size_t)CHAR_SIZE * (buff_len+1));

  #ifdef SAFEMODE
    assert (STRING(word));
  #endif //SAFEMODE

  #ifdef DEBUG_ATOM
    printf("<ATOM>  [%08x] word with len %d \n", (int)word, buff_len);
  #endif //DEBUG_ATOM

  #ifdef ADVANCED
    stats[ID(word)].allocs++;
    stats_free--;
  #endif //ADVANCED

  return word;
}
Parameters:
aatom to delete

Deletes the word atom and deallocates memory used for the word's characters.

Parameters:
aatom to delete

Deletes the subword atom and unlinks it from the host word.

{
  DEUSE( WORD(a) );
  return_to_pool( &data_pool, a );
}
void dump_word ( atom_t  a,
int  level 
)
Parameters:
aatom to dump
leveldump level

Dumps word or subword atom through the current outter function.

{
  // test for full print
  if( full_print_p )
    {
      // count the number of dangerous characters
      int length = LENGTH(a);
      int dirty = (length == 0);

      chars_t chs;
      for( chs=STRING(a); length && !dirty ; chs++,length-- )
      {
        char_t ch = DEBAR(*chs);
        if( ch==TEXT('[') || ch==TEXT(']') ||
            ch==TEXT('(') || ch==TEXT(')') ||
            ch==TEXT('+') || ch==TEXT('-') ||
            ch==TEXT('*') || ch==TEXT('\\') ||
            ch==TEXT('<') || ch==TEXT('>') ||
            ch==TEXT('=') || ch==TEXT(';') ||
            ch==TEXT('|') || ch==TEXT('~') ||
            ch<=TEXT(' ') )
          dirty = 1;
      }

      if( dirty )
   {
     length = LENGTH(a);
     outter( TEXT("|"), 1 );
     outter_size += 1;
     for( chs=STRING(a); length; chs++,length-- )
       {
         char_t ch = DEBAR(*chs);
         if( ch==TEXT('|') || ch==TEXT('\\') )
      {
        outter( TEXT("\\"), 1 );
        outter_size += 1;
      }
         outter( chs, 1 );
         outter_size += 1;
       }
     outter( TEXT("|"), 1 );
     outter_size += 1;
   }
      else
   {
     outter( STRING(a), LENGTH(a) );
     outter_size += LENGTH(a);
   }
      return;
    }


  // no full print
  int limit = print_width_limit;

  if( 0<=limit && limit<10 ) limit=10;

  if( 0<=limit && limit<LENGTH(a) )
    {
      outter( STRING(a), limit );
      outter( TEXT("..."), 3 );
      outter_size += limit+3;
    }
  else
    {
      outter( STRING(a), LENGTH(a) );
      outter_size += LENGTH(a);
    }
}
atom_t read_word ( chars_t  filename)
Parameters:
filenamefile name
Returns:
word atom containing text from the file

Reads the contents of text file into a word atom. If Unicode mode is switched off, the file is assumed to be in ASCII, otherwise it is UTF-8 or UTF-16LE (little endian) depending on the BOM (byte order mark). In case of error its code is stored in global variable errno.

{
  int filesize;
  unsigned char* buffer = (unsigned char*)load_file( filename, &filesize );

  if( errno ) return new_os_error( filename );

  return decode_word( buffer, filesize, 1 );
}
void write_word ( atom_t  word,
chars_t  wfilename 
)
Parameters:
wordword to be written
wfilenamefile name

Writes the contents of a word atom to a file determined by its name. If Unicode mode is switched of, the encoding is ASCII, otherwise it is UTF-16LE (little endian) and the BOM (byte order mark) is included. In case of error its code is stored in global variable errno.

{
  FILE* file;
  char* filename = FILENAME(wfilename);

  // open file
  errno = 0;
  file = fopen( filename, "wb" );

  // read file into the buffer
  #ifdef UNICODE_CHARS
    unsigned char buffer[2] = {0xFF,0xFE};
    if( !errno ) fwrite( buffer, 2, 1, file );
  #endif //UNICODE_CHARS
  if( !errno ) fwrite( STRING(word), LENGTH(word), CHAR_SIZE, file );
  if( !errno ) fclose( file );

  DEALLOC( filename );
}
int same_words ( atom_t  a,
atom_t  b 
)
Parameters:
afirst word
bsecond word
Returns:
0 if words are different, non zero - otherwise

Compares two word atoms case sensitive or insensitive depending on option_case_insensitive global flag.

{
#ifdef SAFEMODE
  assert( IS_WORD(a) || IS_SUBWORD(a) );
  assert( IS_WORD(b) || IS_SUBWORD(b) );
#endif

  unsigned int n = LENGTH(a);
  if( n!=LENGTH(b) ) return 0; // different sizes

  return same_strings( OPTION_CASE_INSENSITIVE, STRING(a), STRING(b), n );
}
atom_t decode_word ( unsigned char *  buffer,
int  size,
int  dealc 
)
Parameters:
bufferbuffer containing raw data
sizesize of buffer
dealcdeallocate (=1) or not (=0) the input buffer
Returns:
word atom containing decoded text

If Unicode mode is switched off, the raw data are assumed to be in ASCII, otherwise they are UTF-8 or UTF-16LE (little endian) depending on the BOM (byte order mark).

BOM signatures are:

  • UTF-16LE: FF FE
  • UTF-16BE: FE FF
  • UTF-8: EF BB BF

The input buffer is deallocated if dealc!=0

{
  #ifdef SAFEMODE
    assert( buffer );
  #endif //SAFEMODE
  atom_t a = (atom_t)take_from_pool( &data_pool );
  REF(a) = 1;

  #ifdef ADVANCED
    stats[WORD_ID].allocs++;
    stats_free--;
  #endif //ADVANCED

  #ifdef UNICODE_CHARS
    // UTF-16LE
    if( buffer[0]==0xFF && buffer[1]==0xFE )
    { // reusing the buffer from load_file
      memcpy( &buffer[0], &buffer[2], size-2 );
      buffer[size-2] = 0;
      buffer[size-1] = 0;
      IDLENGTH(a) = WORD_ID | (((size-2)>>1)<<8);
      STRING(a) = (chars_t)buffer;
      return a;
    }
    // UTF-8
    if( buffer[0]==0xEF && buffer[1]==0xBB && buffer[2]==0xBF )
    { // UTF-8 (header EF BB BF)
      memcpy( &buffer[0], &buffer[3], size-3 );
      buffer[size-3] = 0;
      STRING(a) = UTF8_to_UTF16( buffer );
      IDLENGTH(a) = WORD_ID | (wcslen(STRING(a)))<<8;
      if( dealc ) DEALLOC( buffer );
      return a;
    }
    // ASCII
    buffer[size] = 0;
    STRING(a) = ASCII_to_UTF16( (char*)buffer );
    IDLENGTH(a) = WORD_ID | (STRLEN(STRING(a)))<<8;
    if( dealc ) DEALLOC( buffer );

  #else //UNICODE_CHARS

    // ASCII
    buffer[size] = 0;
    STRING(a) = ASCII_to_ASCII( (char*)buffer );
    IDLENGTH(a) = WORD_ID | (STRLEN(STRING(a)))<<8;
    if( dealc ) DEALLOC( buffer );
  #endif //UNICODE_CHARS

  return a;
}
int atom_to_boolean ( atom_t  a,
int *  np 
)
Parameters:
aatom to convert
nppointer where to put the result
Returns:
returns 1 if conversion was successful.

Converts an atom to a boolean integer number, i.e. 0 indicates FALSE and 1 (or non-zero) indicates TRUE. If conversion is possible, then *np will contain the result and the return value will be 1. Otherwise *np will be set to 0 and the return value will be set to 0.

{
  *np = 0;

  // check for FALSE atom
  if( a == false_true[0] )
    {
      *np = 0;
      return 1;
    }

  // check for TRUE atom
  if( a == false_true[1] )
    {
      *np = 1;
      return 1;
    }

  // eliminate non-words
  if( !IS_ANY_WORD(a) )
    {
      return 0;
    }

  // check for FALSE word
  if( LENGTH(a)==LENGTH(false_true[0]) && 
      same_strings( 1, STRING(a), STRING(false_true[0]), LENGTH(a)) )
    {
      *np = 0;
      return 1;
    }

  // check for TRUE word
  if( LENGTH(a)==LENGTH(false_true[1]) && 
      same_strings( 1, STRING(a), STRING(false_true[1]), LENGTH(a)) )
    {
      *np = 1;
      return 1;
    }

  return 0;
}
int atom_to_string ( atom_t  a,
chars_t  buff,
int *  buff_len 
)
Parameters:
aatom to convert
buffbuffer where string will be written
buff_lenlength of the buffer
Returns:
returns 1 if conversion was successful.

Converts an atom to a string. If buffer is too small string is truncated. Conversion is successfull if atom is word, subword or number. Else return 0

{
  //  outter(TEXT("a2s atom=|"),-1);dump(a);outter(TEXT("|\n"),-1);

    if (IS_ANY_WORD(a))
    {
        int min = LENGTH(a) < *buff_len ? LENGTH(a) : *buff_len;
        int i;
        for(i = 0; i < min; ++ i)
        {
            buff[i] = STRING(a)[i];
        }
        *buff_len = min;
        return 1;
    }else
    {
        float64_t x;
        if (!atom_to_float (a, &x))
        {
            return 0;
        }
        // In Window infinities are printed as "1.#INF" instead
        // of just "inf", and "-1.#IND" instead of "nan". So let's fix this.
        #ifdef WINDOWS
        if(isinf(x)!=0 && x > 0)
        {
            STRNCPY(buff, TEXT("inf"), *buff_len-1);
            if (*buff_len > 4) *buff_len = 3;
            return 1;
        }
        if( isinf(x)!=0 && x < 0)
        {
            STRNCPY(buff, TEXT("-inf"), *buff_len-1);
            if (*buff_len > 5) *buff_len = 4;
            return 1;
        }
        if( isnan(x)!=0 )
        {
            STRNCPY(buff, TEXT("nan"), *buff_len-1);
            if (*buff_len > 4) *buff_len = 3;
            return 1;
        }
      #endif //WINDOWS

        if( (x>=1e23) || (x<=-1e23) || ((x>=-1e-23) && (x<=1e-23)) )
        {
          SPRINTF( buff, *buff_len, FORMAT_EXTRA_FP, x );
          *buff_len = STRLEN(buff);
        }
        else
        {
          SPRINTF(buff, *buff_len, FORMAT_FP, x);
          *buff_len = STRLEN(buff);
          //outter(TEXT("a2s=|"),-1); outter(buff,*buff_len); outter(TEXT("|\n"),-1);
          //atom_t nn=new_integer(*buff_len);
          //outter(TEXT("len="),-1); dumpln(nn);
          while (buff[*buff_len-1] == '0') --*buff_len;
          if (buff[*buff_len-1] == '.') --*buff_len;
          buff[*buff_len] = '\0';
        }
        return 1;
    }
}
Parameters:
dataatom to convert
Returns:
returns word, subword or error atom

Converts an atom to a word or a subword atom. If the input is a (sub)word, then it is returned directly. If it is a number, then a new word is created. Otherwise an error atom is returned.

{
  if( IS_ERROR(data) ) return data;
  if( IS_ANY_WORD(data) ) return USE(data);
  if( IS_UNBOUND(data) ) return new_error( ERROR_NOT_A_WORD, data );
  if( IS_INTEGER(data) || IS_FLOAT(data) )
    {
      #define BUFFER_SIZE 128
      char_t ch[BUFFER_SIZE];
      int len = BUFFER_SIZE;
      if( atom_to_string(data,ch,&len) ) return new_word( ch, len );
      #undef BUFFER_SIZE
    }
  return new_error( ERROR_NOT_A_WORD, data );
}
Parameters:
dataatom to convert
Returns:
returns word or error atom

Converts an atom to a word. This guarantees that the string in the word atom is zero-terminated. If the input is a word then it is reused. Otherwise a new word is created. Note that a new word is created even if the input is a subword.

{
  atom_t subword;
  atom_t word = atom_to_word( data );
  if( IS_SUBWORD(word) )
    {
      subword = word;
      word = new_word( STRING(subword), LENGTH(subword) );
      DEUSE( subword );
    }
  return word;
}
int same_strings ( int  ci,
chars_t  sa,
chars_t  sb,
int  n 
)
Parameters:
ci1=case insensitive, 0=case sensitive
safirst string
sbsecond string
nstring length
Returns:
0 if strings are different, non zero - otherwise

Compares two strings case sensitive or insensitive.

{
  if( ci )
  // case sensitive   A!=a
  while( n-- ) 
  {
    if( TOLOWER(DEBAR(*sa)) != TOLOWER(DEBAR(*sb)) ) return 0;
    sa++;
    sb++;
  }
  else
  // case sensitive   A!=a
  while( n-- )
  {
    if( DEBAR(*sa) != DEBAR(*sb) ) return 0;
    sa++;
    sb++;
  }
  return 1;
}

Variable Documentation


[ HOME | INDEX | ATOMS | VARS | REFERENCE ]
Lhogho Developer's Documentation
Tue Feb 7 2012