Lhogho  0.0.027
Defines
parser.c File Reference

Defines

#define GET_CHAR   ch=*source
 get current character
#define VAR   vars[count]
 shortens access to vars array
#define LINK
#define RELINK

Tokentypes

#define TOKEN_END   0
 no more tokens available
#define TOKEN_WORD   1
 normal word token
#define TOKEN_DIRTY_WORD   2
 word token with unhandled backslashs or bars
#define TOKEN_OPEN   3
 open square bracket token
#define TOKEN_CLOSE   4
 close square bracket token
#define TOKEN_LINEEND   5
 end of line token
#define TOKEN_SPACE   6
 whitespace token
#define TOKEN_ERROR   7
 error occured

Parse modes

#define MODE_ENTRY   0
 entry mode
#define MODE_SPACE   1
 space mode
#define MODE_WORD   2
 word mode
#define MODE_BARRED   3
 barred mode
#define MODE_BACKSLASHED   4
 backslashed mode
#define MODE_TILDE   5
 tilde mode
#define MODE_SEMITILDE   6
 semitilde mode
#define MODE_SEMICOLON   7
 semicolon mode
#define MODE_TILDESPACE   8
 tildespace mode
#define MODE_LESS   9
 less mode
#define MODE_GREATER   10
 greater mode
#define MAX_MODE   11
 greatest mode + 1

Microprogram defines (basic)

#define PAT_TILDE   0x000001
 push tilde
#define PAT_PUSH   0x000002
 push last character
#define PAT_NEXT   0x000004
 goto next character
#define PAT_DIRTY   0x000008
 set dirty flag
#define PAT_MUTATED   0x000010
 set mutated flag
#define PAT_RETURN_TO_WORD   0x000020
 next return will return to word mode
#define PAT_RETURN_TO_SELF   0x000040
 next return will return to current mode
#define PAT_GOTO   0x000080
 follows 4bit go-to-mode
#define PAT_TOKEN   0x000100
 follows 4bit token_id
#define PAT_RETURN   0x000200
 return to stack-to-mode
#define PAT_ERROR_POS   0x000400
 remember current position as error position
#define PAT_ERROR   0x000800
 generate error
#define PAT_RETURN_TO_CALLER   0x001000
 next return will return to the caller of this mode
#define PAT_NEXT2   0x002000
 goto next next character

Microprogram defines (compound)

#define PAT_SHIFT   16
 shift factor
#define PAT_TOKEN_END   PAT_TOKEN+(TOKEN_END<<PAT_SHIFT)
 microprogram for end token
#define PAT_TOKEN_SPACE   PAT_TOKEN+(TOKEN_SPACE<<PAT_SHIFT)
 microprogram for space token
#define PAT_TOKEN_WORD   PAT_TOKEN+(TOKEN_WORD<<PAT_SHIFT)
 microprogram for word token
#define PAT_TOKEN_LINE   PAT_TOKEN+(TOKEN_LINEEND<<PAT_SHIFT)
 microprogram for line end token
#define PAT_TOKEN_OPEN   PAT_TOKEN+(TOKEN_OPEN<<PAT_SHIFT)
 microprogram for open token
#define PAT_TOKEN_CLOSE   PAT_TOKEN+(TOKEN_CLOSE<<PAT_SHIFT)
 microprogram for close token
#define PAT_GOTO_SPACE   PAT_GOTO+(MODE_SPACE<<PAT_SHIFT)
 microprogram for goto space mode
#define PAT_GOTO_WORD   PAT_GOTO+(MODE_WORD<<PAT_SHIFT)
 microprogram for goto word mode
#define PAT_GOTO_BACKSLASHED   PAT_GOTO+(MODE_BACKSLASHED<<PAT_SHIFT)
 microprogram for goto backslashed mode
#define PAT_GOTO_BARRED   PAT_GOTO+(MODE_BARRED<<PAT_SHIFT)
 microprogram for goto barred mode
#define PAT_GOTO_SEMICOLON   PAT_GOTO+(MODE_SEMICOLON<<PAT_SHIFT)
 microprogram for goto semicolon mode
#define PAT_GOTO_TILDE   PAT_GOTO+(MODE_TILDE<<PAT_SHIFT)
 microprogram for goto tilde mode
#define PAT_GOTO_TILDESPACE   PAT_GOTO+(MODE_TILDESPACE<<PAT_SHIFT)
 microprogram for goto tildespace mode
#define PAT_GOTO_SEMITILDE   PAT_GOTO+(MODE_SEMITILDE<<PAT_SHIFT)
 microprogram for goto semitilde mode
#define PAT_GOTO_LESS   PAT_GOTO+(MODE_LESS<<PAT_SHIFT)
 microprogram for goto less mode
#define PAT_GOTO_GREATER   PAT_GOTO+(MODE_GREATER<<PAT_SHIFT)
 microprogram for goto greater mode

Special characters

When a chracter is barrable and is actually barred then its code is changed. For example codes for space and barred space are different for Lhogho, but should appear the same for users.

Barrable characters are ()+-

#define MAX_ELEMS   128
 maximal number of elements in a statement
char_t debar [32]
 table for a->|a| conversions
atom_t build_syntax_subtree (atom_t function, atom_t source)
 parses sublist of commands
void init_parser ()
 initializes parser
atom_t tokenize (atom_t input, int method)
 tokenizes into a list
atom_t trim_shell_comment (atom_t word)
 trims shell comment (if any)
atom_t purify (atom_t word)
 purifies a word
atom_t parentheses (atom_t input)
 processes parentheses
atom_t define_user_function (atom_t source, atom_t input, atom_t parent)
 creates var atom for a user-defined function
atom_t preparse (atom_t input, atom_t parent, int level)
 extracts all TO ... END definitions
atom_t parse (atom_t input, atom_t parent, int top_level)
 parses a list into a tree
atom_t build_syntax_tree (atom_t func)
 parses body of user-defined function

Define Documentation

#define GET_CHAR   ch=*source
#define VAR   vars[count]
#define TOKEN_END   0
#define TOKEN_WORD   1
#define TOKEN_DIRTY_WORD   2
#define TOKEN_OPEN   3
#define TOKEN_CLOSE   4
#define TOKEN_LINEEND   5
#define TOKEN_SPACE   6
#define TOKEN_ERROR   7
#define MODE_ENTRY   0
#define MODE_SPACE   1
#define MODE_WORD   2
#define MODE_BARRED   3
#define MODE_BACKSLASHED   4
#define MODE_TILDE   5
#define MODE_SEMITILDE   6
#define MODE_SEMICOLON   7
#define MODE_TILDESPACE   8
#define MODE_LESS   9
#define MODE_GREATER   10
#define MAX_MODE   11
#define PAT_TILDE   0x000001
#define PAT_PUSH   0x000002
#define PAT_NEXT   0x000004
#define PAT_DIRTY   0x000008
#define PAT_MUTATED   0x000010
#define PAT_RETURN_TO_WORD   0x000020
#define PAT_RETURN_TO_SELF   0x000040
#define PAT_GOTO   0x000080
#define PAT_TOKEN   0x000100
#define PAT_RETURN   0x000200
#define PAT_ERROR_POS   0x000400
#define PAT_ERROR   0x000800
#define PAT_RETURN_TO_CALLER   0x001000
#define PAT_NEXT2   0x002000
#define PAT_SHIFT   16
#define MAX_ELEMS   128
#define LINK
Value:
{ \
      usedby[j] = i; \
      args[i]--; \
    }
#define RELINK
Value:
{ \
      if( j<usedby[j] ) { largs[usedby[j]]++; finished=0; } \
      if( j>usedby[j] ) { rargs[usedby[j]]++; finished=0; } \
      LINK; \
    }

Function Documentation

atom_t build_syntax_subtree ( atom_t  function,
atom_t  source 
)
Parameters:
functionvar atom for the parse context
sourceword or list containing the source
Returns:
abstract syntax tree or an error atom

Parses a list of commands (like these in IF or REPEAT) and build an abstract syntax tree:

  • tokenization of body as commands
  • extracting all TO ... ENDs and create them as subfunctions
  • parsing the func's body into abstract syntax tree
  • recursively build trees of subfunctions
{
  //printf("BUILD_SYNTAX_SUBTREE(");
  //dump(NAME(function));
  //printf(",");
  //dump(source);
  //printf(")\n\n");

  // tokenize
  atom_t tokens1 = tokenize( source, TOKENIZE_DATA );
  if( IS_ERROR(tokens1) ) return tokens1;
  //printf("tokens1="); dumpln(tokens1);

  atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
  DEUSE( tokens1 );
  if( IS_ERROR(tokens2) ) return tokens2;
  //printf("tokens2="); dumpln(tokens2);

  // extract TO..END's
  atom_t body = preparse( tokens2, function, LEVEL(function) );
  if( IS_ERROR(body) ) return body;
  //printf("body="); dumpln(body);

  // parse function body
  atom_t tree = parse( body, function, 1 );
  if( IS_ERROR(tree) ) return tree;
  //printf("tree="); dumpln(tree);

  DEUSE( tokens2 );
  //DEUSE( body );
  return tree;
}

Initializes tables enbar[] and debar[] which are used to enbar and debar a character.

{
  int i;

  // NOTE! if the string of enbarrable characters is
  // changed, also change dump_word()
  chars_t s = TEXT("______________()+-*/=<>|?_______");
  //locked->        x........xx..x..................

  for( i=0; i<32;  i++ ) debar[i]=i;   // identity matrix
  for( i=0; i<128; i++ ) enbar[i]=i;   // identity matrix

  for( i=0; *s; i++,s++ )
  {
    if( *s==TEXT('_') ) continue;
    debar[i] = *s;
    enbar[(unsigned char)*s] = i;
  }
}
atom_t tokenize ( atom_t  input,
int  method 
)
Parameters:
inputword, subword or list to be tokenized
methodmethod of tokenization
Returns:
tokenized list

Tokenizes a word, a subword or a list into a list. If possible makes all words as subwords. Backslashes and bars in words are preserved (i.e. words are not purified). Comments and line continuations are ignored.

If the method is TOKENIZE_DATA then the input is tokenized as if it contains Logo data. If the method is TOKENIZE_COMMANDS then the input is tokenized as if it contains Logo commands. If the method is TOKENIZE_READWORD then the input is tokenized as expected by READWORD function.

If the input is a list then all its elements are tokenized one-by-one.

Return value is the tokenized list. In there is an error, returns an error atom which error code is ERROR_INCOMPLETE_PAIR, error position points the position in the word (0-based) and the error source is the word itself.

{
  if( IS_LIST(input) )
  {
    if( method==TOKENIZE_DATA && 
        GET_FLAGS( input, FLAG_TOKENIZED_DATA|FLAG_TOKENIZED_COMMANDS ) )
    return USE(input);
    if( method==TOKENIZE_COMMANDS && 
        GET_FLAGS( input, FLAG_TOKENIZED_COMMANDS ) )
    return USE(input);
  }

  if( IS_FLOAT(input) )
  {
    return USE(input);
  }

  //printf("\n");
  //if(method==TOKENIZE_DATA)
  //  printf("ENTER TOKENIZE_DATA(");
  //else
  //  printf("ENTER TOKENIZE_COMMANDS(");
  //dump_atom(input,1);
  //printf(")\n");


  #ifdef SAFEMODE
  assert( IS_WORD(input) || IS_SUBWORD(input) || IS_LIST(input) );
  #endif

  // First check whether the input is a list.
  // If it is then tokenize recursively all its elements.
  if( IS_LIST(input) )
  { 
    atom_t result = empty_list;
    atom_t last   = empty_list;
    atom_t x;
    atom_t y;
    for( x=input; IS_NOT_EMPTY(x); x=CDR(x) )
    {
      //printf("@@@@@@@@PROCESSING=|"); dump_atom(CAR(x),1); printf("|\n");

      int submethod = IS_LIST(CAR(x))?TOKENIZE_DATA:method;
      atom_t element = tokenize( CAR(x), submethod );
      //printf("@@@@@@@@TOKENIZED_INTO=|"); dump_atom(element,1); printf("|\n");

      if( IS_ERROR(element) )
      {
        DEUSE( result );
        //DEUSE( last );
        result = element;
        break;
      }

      int initial_flags = GET_FLAGS( x, FLAG_NEWLINE|FLAG_AFTER_SPACE );
      int final_flags   = GET_FLAGS( x, FLAG_BEFORE_SPACE );

      //printf(">>>CAR(x)  = "); dump_atom(CAR(x),1); printf("\n");
      //printf(">>>element = "); dump_atom(element,1); printf("\n");
      if( IS_FLOAT(element) )
      {
        append( USE(element), &result, &last );
        SET_FLAGS( last, initial_flags|final_flags );
      }
      else if( IS_LIST(CAR(x)) )
      {
        if( IS_EXTENDED(x) )
        {
          append_ex( USE(element), &result, &last );
          DEUSE( POS(last) );
          POS( last ) = USE( POS(x) );
        }
        else
        append( USE(element), &result, &last );
        SET_FLAGS( last, initial_flags|final_flags );
      }
      else
      {
        for( y=element; IS_NOT_EMPTY(y); y=CDR(y) )
        {
          //printf("APPEND SUBELEMENT |");
          //dump_atom(CAR(y),1);
          //printf("|\n");

          if( IS_EXTENDED(y) )
          {
            append_ex( USE(CAR(y)), &result, &last );
            DEUSE( POS(last) );
            POS( last ) = USE( POS(y) );
          }
          else
          append( USE(CAR(y)), &result, &last );
          SET_FLAGS( last, FLAGS(y) );
          if( y==element ) SET_FLAGS( last, initial_flags );
        }
        #ifdef SAFE_MODE
        assert( IS_NOT_EMPTY(last) );
        #endif
        SET_FLAGS( last, final_flags);
      }

      DEUSE( element );
    }

    //printf("FINAL RESULT IS |"); dump_atom(result,1); printf("|\n");
    return result;
  }

  // The input is a word or a subword
  chars_t source  = STRING(input);
  int     len     = LENGTH(input);
  int     origlen = len;

  chars_t buffer = ALLOC( CHAR_SIZE*len ); // buffer for the longest word
  //chars_t bp     = buffer;
  char_t  ch;

  int_t   errpos    = -1;
  //char_t  errchar   = NULL_CHAR;

  int last_token = TOKEN_LINEEND;
  //int crlf = 0;

  // Gets the next token. Return:
  //  TOKEN_END    if there are no more tokens
  //  TOKEN_WORD   if the token is a word
  //  TOKEN_DIRTY_WORD if the token is a word with \ or |
  //  TOKEN_OPEN   if the token is [
  //  TOKEN_CLOSE  if the token is ]
  //  TOKEN_LINEEND   if the token is <nl>
  //    TOKEN_SPACE   if at least one whitespace is met


  //int co=0;
  int get_token( atom_t *token, int method )
  {
    //co++;
    //if (0 == co%1024)
    //{
    //printf("%d ",co);
    //}
    // return 1 if buffer contains number
    int is_number(chars_t bp)
    { // "E" {digit}* "." {digit}+
      //  1     2      3     4
      chars_t cp = bp;
      int num_mode = 1;
      char_t ch;

      cp = bp;

      if( bp==buffer ) return 0;

      while( cp>buffer )
      {
        cp--;
        ch = *cp;
        //printf("num_mode=%d ch=%C\n",num_mode,ch);
        switch( num_mode )
        {
        case 1:
          if( ch!=TEXT('E') && ch!=TEXT('e') ) return 0;
          num_mode = 2;
          break;
        case 2: ;
          if( ch<TEXT('0') || ch>TEXT('9') ) return 0;
          num_mode = 3;
          break;
        case 3: ;
          if( ch!=TEXT('.') ) return 0;
          num_mode = 4;
          break;
        case 4: ;
          if( ch<TEXT('0') || ch>TEXT('9') ) return 0;
          break;
        }
        //printf("num_mode=%d\n",num_mode);
      }
      return 1;
    }

    if( !len ) return TOKEN_END;

    int dirty = 0;
    int mutated = 0;
    chars_t bp = buffer; *bp=NULL_CHAR;
    chars_t sp = source;

    int mode = MODE_ENTRY; // current mode
    int code;        // action code


    int stack[MAX_MODE];   // return-to-mode for each mode
    static int mode_eof[MAX_MODE] =
    {
      /* entry */ PAT_TOKEN_END,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_ERROR,
      /* backslashed */ PAT_ERROR,
      /* tilde */ PAT_ERROR,
      /* semitilde   */ PAT_ERROR,
      /* semicolon   */ PAT_RETURN,
      /* tildespace  */ PAT_ERROR,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_eol[MAX_MODE] =
    {
      /* entry */ PAT_NEXT+PAT_TOKEN_LINE,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_NEXT+PAT_RETURN,
      /* semitilde   */ PAT_NEXT+PAT_RETURN,
      /* semicolon   */ PAT_RETURN,
      /* tildespace  */ PAT_NEXT+PAT_RETURN,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_space[MAX_MODE] =
    {
      /* entry */ PAT_NEXT+PAT_GOTO_SPACE,
      /* whitespace  */ PAT_NEXT,
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_NEXT+PAT_GOTO_TILDESPACE,
      /* semitilde   */ PAT_NEXT,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_NEXT,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_open[MAX_MODE] =
    {
      /* entry */ PAT_NEXT+PAT_TOKEN_OPEN,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_close[MAX_MODE] =
    {
      /* entry */ PAT_NEXT+PAT_TOKEN_CLOSE,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_bar[MAX_MODE] =
    {
      /* entry */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_RETURN_TO_WORD+PAT_GOTO_BARRED+PAT_ERROR_POS,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_GOTO_BARRED+PAT_RETURN_TO_SELF+PAT_ERROR_POS,
      /* barred   */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_GOTO_BARRED+PAT_RETURN_TO_SELF+PAT_ERROR_POS,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_backslash[MAX_MODE] =
    {
      /* entry */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_RETURN_TO_WORD+PAT_GOTO_BACKSLASHED+PAT_ERROR_POS,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_RETURN_TO_SELF+PAT_GOTO_BACKSLASHED+PAT_ERROR_POS,
      /* barred   */ PAT_PUSH+PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_BACKSLASHED,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT2, //+PAT_RETURN_TO_SELF+PAT_GOTO_BACKSLASHED,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_tilde[MAX_MODE] =
    {
      /* entry */ PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_TILDE+PAT_ERROR_POS,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_NEXT+PAT_MUTATED+PAT_RETURN_TO_SELF+PAT_GOTO_TILDE+PAT_ERROR_POS,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT+PAT_MUTATED+PAT_RETURN_TO_CALLER+PAT_GOTO_SEMITILDE+PAT_ERROR_POS,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_semicolon[MAX_MODE] =
    {
      /* entry */ PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_SEMICOLON,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_NEXT+PAT_MUTATED+PAT_RETURN_TO_SELF+PAT_GOTO_SEMICOLON,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_SEMITILDE,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_else[MAX_MODE] =
    {
      /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_WORD,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_PUSH+PAT_NEXT,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_parens[MAX_MODE] =
    {
      /* entry */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_equal[MAX_MODE] =
    {
      /* entry */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD, 
      /* greater  */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD
    };
    static int mode_less[MAX_MODE] =
    {
      /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_LESS,
      /* whitespace  */ PAT_PUSH+PAT_NEXT+PAT_GOTO_LESS, //PAT_TOKEN_WORD, @boza
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_greater[MAX_MODE] =
    {
      /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_GREATER,
      /* whitespace  */ PAT_PUSH+PAT_NEXT+PAT_GOTO_GREATER, //PAT_TOKEN_WORD, @boza
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };


again:
    // get action code
    GET_CHAR;
    
    
#ifdef DEBUG_TOKENS
    printf("<TOKENS> length=%d\n",len);
    if(ch<TEXT(' '))
    printf("<TOKENS> get(#%d)\t",ch);
    else
    printf("<TOKENS> get('%c')\t",ch);
    switch( mode )
    {
    case MODE_ENTRY:    printf("     ENTRY -> "); break;
    case MODE_SPACE:    printf("     SPACE -> "); break;
    case MODE_WORD:     printf("      WORD -> "); break;
    case MODE_BARRED:      printf("    BARRED -> "); break;
    case MODE_BACKSLASHED: printf(" BACKSLASH -> "); break;
    case MODE_TILDE:    printf("     TILDE -> "); break;
    case MODE_SEMITILDE:   printf(" SEMITILDE -> "); break;
    case MODE_SEMICOLON:   printf(" SEMICOLON -> "); break;
    case MODE_TILDESPACE:  printf("TILDESPACE -> "); break;
    }
#endif
    code = mode_else[mode];
    if( !len )
    code = mode_eof[mode];
    else
    {
      if( method==TOKENIZE_READWORD )
      { // tokenize as expected by READWORD
        if( ch==TEXT('|')  ) code = mode_bar[mode];
        else if( ch==TEXT('\\') ) code = mode_backslash[mode];
      }
      else
      if( method==TOKENIZE_READLIST )
      { // tokenize as data
        if(      ch==TEXT('\n') ) code = mode_eol[mode];
        else if( ch==TEXT('\r') ) code = mode_eol[mode];
        else if( ch<=TEXT(' ') ) code = mode_space[mode];
        else if( ch==TEXT('[')  ) code = mode_open[mode];
        else if( ch==TEXT(']')  ) code = mode_close[mode];
        else if( ch==TEXT('|')  ) code = mode_bar[mode];
        else if( ch==TEXT('\\') ) code = mode_backslash[mode];
        else if( ch==TEXT('~')  ) code = mode_tilde[mode];
      }
      else
      if( method==TOKENIZE_DATA )
      { // tokenize as data
        if(      ch==TEXT('\n') ) code = mode_eol[mode];
        else if( ch==TEXT('\r') ) code = mode_eol[mode];
        else if( ch<=TEXT(' ') ) code = mode_space[mode];
        else if( ch==TEXT('[')  ) code = mode_open[mode];
        else if( ch==TEXT(']')  ) code = mode_close[mode];
        else if( ch==TEXT('|')  ) code = mode_bar[mode];
        else if( ch==TEXT('\\') ) code = mode_backslash[mode];
        else if( ch==TEXT('~')  ) code = mode_tilde[mode];
        else if( ch==TEXT(';')  ) code = mode_semicolon[mode];
      }
      else
      { // tokenize as commands
        //printf("ch=%c *buf=%c\n",ch,*buffer);
        if( ch==TEXT('(') ||
            ch==TEXT(')') ) code = mode_parens[mode];
        else if( *buffer!=TEXT('"') )
        {
          if( (ch==TEXT('+') || ch==TEXT('-')) && is_number(bp) ) { }
          else if( ch==TEXT('+') || 
              ch==TEXT('-') || 
              ch==TEXT('*') ||
              ch==TEXT('/'))  code = mode_parens[mode];
          else if( ch==TEXT('=') ) code = mode_equal[mode];
          else if( ch==TEXT('<') ) code = mode_less[mode];
          else if( ch==TEXT('>') ) code = mode_greater[mode];
        }
      }
    }
    // process action code
    int _stack = -1;

    int newmode = 0;
    if( code & PAT_GOTO )
    {
      // this is pred-processing of GOTO
      // if old mode was space, and new is not,
      // then update initial position of next token
      newmode = (code>>PAT_SHIFT) & 0xF;
      if( mode==MODE_SPACE && newmode!=MODE_SPACE ) { sp = source; }
    }


    if( code & PAT_TILDE ) 
    {
      *bp++ = TEXT('~');
      #ifdef DEBUG_TOKENS
      printf("\n<TOKENS> put('%c'/%d)\n",TEXT('~'),TEXT('~'));
      #endif //DEBUG_TOKENS
    }
    if( code & PAT_PUSH )  
    {
      // push a character only if:
      // - currently not in bars |..?..|
      // - currently in bars, but not in semicolon ;..|..?..|
      if( mode!=MODE_BARRED || 
          (stack[mode]!=MODE_SEMITILDE &&
            stack[mode]!=MODE_SEMICOLON) )
      {
        //if( mode==MODE_BARRED || mode==MODE_BACKSLASHED )
        //*bp++ = ENBAR(ch);
        //else
        *bp++ = ch;
        #ifdef DEBUG_TOKENS
        printf("\n<TOKENS> put('%c'/%d)\n",ch,ch);
        #endif //DEBUG_TOKENS
        //if( ch=='\r' && *(source+1)=='\n' ) // handle CRLF cases
        //{
        //*bp++ = '\n';
        //#ifdef DEBUG_TOKENS
        //  printf("\n<TOKENS> put('%d')\n",'\n');
        // #endif //DEBUG_TOKENS
        //}
      }
    }
    if( code & PAT_NEXT2 )
    {
      source++;
      len--;
    }
    if( code & (PAT_NEXT|PAT_NEXT2) )
    {
      if( *source=='\r' )
      {
        //crlf = 0;
        if( *(source+1)=='\n' )
        {
          //crlf = 1;
          source++;
          len--;
        }
      }
      source++;
      len--;
    }
    if( code & PAT_DIRTY ) dirty = 1;
    if( code & PAT_MUTATED )  mutated = 1;
    if( code & PAT_ERROR_POS )   errpos = origlen-len-1;
    if( code & PAT_RETURN_TO_WORD ) _stack = MODE_WORD;
    if( code & PAT_RETURN_TO_SELF ) _stack = mode;
    if( code & PAT_RETURN_TO_CALLER ) _stack = stack[mode];
    if( code & PAT_GOTO )
    {
      // this is post-processing of GOTO
      mode = newmode;
      stack[mode] = _stack;
    }
    if( code & PAT_RETURN )   mode = stack[mode];
    if( code & PAT_TOKEN ) 
    {
      int _token = (code>>PAT_SHIFT) & 0xF;
      if( _token!=TOKEN_WORD ) return _token;
      if( mutated )
      {
        *bp = NULL_CHAR;
        *token  = new_word( buffer, UNKNOWN );
        #ifdef DEBUG_TOKENS
        printf("MUTATED TOKEN "); dumpln(*token);
        printf("\n\n");
        #endif
      }
      else
      {
        *token = new_subword( input, sp, source-sp /*bp-buffer*/ );
        #ifdef DEBUG_TOKENS
        printf("NORMAL TOKEN **"); dump(*token);
        printf("** (len=%d)\n\n\n",source-sp);
        #endif
      }
      return dirty?TOKEN_DIRTY_WORD:TOKEN_WORD;
    }
    if( code & PAT_ERROR )
    {
      #ifdef DEBUG_TOKENS
      printf("ERROR\n");
      #endif
      return TOKEN_ERROR;
    }

#ifdef DEBUG_TOKENS
    switch( mode )
    {
    case MODE_ENTRY:    printf("ENTRY\n"); break;
    case MODE_SPACE:    printf("SPACE\n"); break;
    case MODE_WORD:     printf("WORD\n"); break;
    case MODE_BARRED:      printf("BARRED\n"); break;
    case MODE_BACKSLASHED: printf("BACKSLASH\n"); break;
    case MODE_TILDE:    printf("TILDE\n"); break;
    case MODE_SEMITILDE:   printf("SEMITILDE\n"); break;
    case MODE_SEMICOLON:   printf("SEMICOLON\n"); break;
    case MODE_TILDESPACE:  printf("TILDESPACE\n"); break;
    }
#endif
    goto again;
  } // get_token()


  atom_t get_sublist( int level, int full_parse, atom_t* pos ) //ex2//
  {
    atom_t result  = empty_list;
    atom_t last    = empty_list;
    if( pos ) *pos = NULL; //ex2//

    atom_t token   = NULL;
    atom_t sublist_pos = NULL; //ex2//
    int    flags;
    int bracketlen = len;  // LEN of last opening bar

    int pos_from = source-STRING(input); //ex2//
    if( pos_from ) pos_from--; //ex2//

    flags = 0;//FLAG_NEWLINE;
    while( (last_token=get_token(&token,full_parse)) )
    {
      sublist_pos = NULL;

      #ifdef DEBUG_TOKENIZATION
      switch(last_token)
      {
      case TOKEN_END:    printf("TOKEN_END\n"); break;
      case TOKEN_SPACE:  printf("TOKEN_SPACE\n"); break;
      case TOKEN_WORD:   printf("TOKEN_WORD    @"); dump(token); printf("@\n"); break;
      case TOKEN_DIRTY_WORD: printf("TOKEN_|WORD|  @"); dump(token); printf("@\n"); break;
      case TOKEN_OPEN:   printf("TOKEN_OPEN    [\n"); break;
      case TOKEN_CLOSE:  printf("TOKEN_CLOSE   ]\n"); break;
      case TOKEN_LINEEND:   printf("TOKEN_LINEEND\n"); break;
      case TOKEN_ERROR:  printf("TOKEN_ERROR\n"); break;
      }
      #endif

      if( last_token==TOKEN_ERROR ) return result;
      if( last_token==TOKEN_SPACE ) 
      {
        if( IS_NOT_EMPTY(last) ) SET_FLAGS( last, FLAG_BEFORE_SPACE );
        flags |= FLAG_AFTER_SPACE;
        continue;
      }
      if( last_token==TOKEN_DIRTY_WORD )
      {
        //printf("###BEFORE="); dumpln(token);
        atom_t x = purify( token );
        DEUSE( token );
        token = x;
        //printf("###AFTER="); dumpln(token);
      }
      if( last_token==TOKEN_CLOSE ) break;
      if( last_token==TOKEN_LINEEND )
      {
        flags |= FLAG_NEWLINE;
        continue;
      }
      if( last_token==TOKEN_OPEN)
      {
        token = get_sublist( level+1, TOKENIZE_DATA, &sublist_pos ); // recursive //ex2//

        if( last_token==TOKEN_ERROR )
        {
          DEUSE( token );
          if( sublist_pos ) DEUSE(sublist_pos);
          sublist_pos = NULL;
          break;
        }
      }

      if( method==TOKENIZE_COMMANDS )
      flags |= FLAG_TOKENIZED_COMMANDS;

      if( method==TOKENIZE_DATA )
      flags |= FLAG_TOKENIZED_DATA;

      if( method==TOKENIZE_COMMANDS &&
          last_token==TOKEN_WORD &&
          LENGTH(token)>1 &&
          *STRING(token)==TEXT('?') &&
          *(STRING(token)+1)>=TEXT('0') &&
          *(STRING(token)+1)<=TEXT('9') )
      {
        //printf(">>>%d %d\n", last_token==TOKEN_WORD, last_token==TOKEN_DIRTY_WORD);
        // process template ?nn->(? nn) for command tokenization
        //printf("append token **"); dump(token); printf("**\n");
        atom_t new_qoken = new_subword( token, STRING(token), 1 );
        atom_t new_token = new_subword( token, STRING(token)+1, LENGTH(token)-1 );
        DEUSE( token );

        append( new_word(TEXT("("),-1), &result, &last );   // (
        append( new_qoken, &result, &last );    // ?
        append( new_token, &result, &last );    // nn
        append( new_word(TEXT(")"),-1), &result, &last );   // )
      }
      else
      {
        // normal token, no more processing needed
        if( sublist_pos ) //ex//
        {
          append_ex( token, &result, &last );
          POS( last ) = sublist_pos;

          //printf("\n\nSET EXTENDED POSITION ");
          //dump_atom(sublist_pos,1); printf("\n");
          //printf("CURRENT RESULT ");
          //dump_atom(result,1); printf("\n\n");

          sublist_pos = NULL;
        }
        else
        {
          append( token, &result, &last );
        }
        SET_FLAGS( last, flags );
      }

      flags = 0;
      if( last_token==TOKEN_ERROR ) break;
    }

    // test for unmatching square brackets
    // i.e. ...[... or ...]...
    if( level )
    {
      if( last_token==TOKEN_END )
      {
        errpos = origlen-bracketlen-1;
        last_token = TOKEN_ERROR;
      }
    }
    else
    {
      if( last_token==TOKEN_CLOSE )
      {
        errpos = origlen-len-1;
        last_token = TOKEN_ERROR;
      }
    }

    int pos_to = source-STRING(input);
    if( pos_to ) pos_to--;

    if( pos && method == TOKENIZE_DATA ) //ex2//
    {
      *pos = new_subword( input, STRING(input)+pos_from, pos_to-pos_from+1 );
    }

    return result;
  } // get_sublist()

  atom_t result = get_sublist( 0, method, NULL ); //ex//
  
  // in case of error return empty list
  if( last_token==TOKEN_ERROR )
  {
    //printf("ERROR RESULT=");dumpln(result);
    DEUSE( result );
    result = new_parse_error( ERROR_INCOMPLETE_PAIR, errpos, input );
  }


  DEALLOC( buffer );
  //printf("#########");
  //dump_atom(result,1);
  //printf("######\n");

  return result;
}
Parameters:
wordword containing source text
Returns:
atom with the source text with trimmed shell comment

Trims a shell comment from the beginning of the word. Shell comment can be only the first line if its first two characters are #!. If a shell comment is trimmed, then the result is a subword from the first character on the second line, otherwise the input word is returned as is but with increased reference count.

{
  #ifdef SAFEMODE
  assert( IS_WORD(word) || IS_SUBWORD(word) );
  #endif

  chars_t source = STRING(word);
  int_t   len    = LENGTH(word);

  // if there are no enough characters just exit
  if( LENGTH(word)<2 ) return USE(word);

  // if the first two characters are not #! then exit
  if( *source!=TEXT('#') || *(source+1)!=TEXT('!') ) return USE(word);

  // skip the line
  while( len && *source!=TEXT('\n') )
  {
    source++;
    len--;
  }

  // return a subword. Pay attention to always reference
  // the main host word because the input could be a word
  // or a subword.
  if( IS_WORD(word) )
  return new_subword( word, source, len );
  else
  return new_subword( WORD(word), source, len );
}
atom_t purify ( atom_t  word)
Parameters:
wordword to be purified
Returns:
purified word

Purifies a word by processing all backslashes and bars. Returns a new word if needed. Assumes that the input needs purification.

{
  //return USE(word);
  #ifdef SAFEMODE
  assert( IS_WORD(word) || IS_SUBWORD(word) );
  #endif

  chars_t source = STRING(word);
  int_t   len    = LENGTH(word);

  chars_t buffer = alloca( CHAR_SIZE*len ); // buffer for the longest word
  chars_t bp = buffer;

  int need_enbar    = 0;
  int is_mutated    = 0; // set to 1 if the word is mutated
  int in_backslash  = 0;
  int in_bars       = 0;
  for( ; len; len--,source++ )
  {
    need_enbar = in_bars || in_backslash;
    if( in_backslash )
    {
      in_backslash = 0;
    }
    else if( *source==TEXT('\\') )
    {
      is_mutated = 1;
      in_backslash = 1;
      continue;
    }
    else if( *source==TEXT('|') )
    {
      is_mutated = 1;
      in_bars = !in_bars;
      continue;
    }
    if( need_enbar )
    *bp++ = ENBAR(*source);
    else
    *bp++ = *source;

    //if( need_enbar )
    //printf(" PURIFY %d %d\n",*source,ENBAR(*source));
    //else
    //printf(" PURIFY %d   \n",*source );
  }

  *bp = NULL_CHAR;

  if( is_mutated )
  return new_word( buffer, bp-buffer );
  else
  return USE(word);
}
Parameters:
inputflat list
Returns:
list with nested sublists

Processes all (...) of a flat list by making them as sublists. Returns ERROR_INCOMPLETE_PAIR if the parentheses are not paired well.

{
  atom_t result = empty_list;

  atom_t _paren( int level, atom_t openparen, int* last_flags )
  {
#ifdef DEBUG_PARENTHESES
    printf("<PAREN> ENTER LEVEL %d INPUT=", level);
    dumpln(input);
#endif //DEBUG_PARENTHESES

    *last_flags = 0;

    atom_t result = empty_list;
    atom_t last = empty_list;
    while( IS_NOT_EMPTY(input) )
    {
      // get current element
      atom_t elem = USE(CAR(input));
      int flags = FLAGS(input);

#ifdef DEBUG_PARENTHESES
      printf("<PAREN> READ ELEMENT ");
      dumpln(elem);
#endif //DEBUG_PARENTHESES

      // if it is ( then call _parse() recursively
      // if it is ) then exit current _parse()
      if( IS_WORD(elem)||IS_SUBWORD(elem) )
      if( LENGTH(elem)==1 )
      {
        if( *STRING(elem)==TEXT('(') ) // process (
        {
          int f;
          DEUSE( elem ); //compensate USE() when elem was retrieved
          input = CDR(input);
          elem = _paren( level+1, elem, &f );
          flags |= f;
          if( IS_ERROR(elem) )
          {
            DEUSE( result );
            return elem;
          }
        }
        else if( *STRING(elem)==TEXT(')') )// process )
        {
          DEUSE( elem ); //compensate USE() when elem was retrieved
          *last_flags = FLAGS(input);
          break;
        }
      }

#ifdef DEBUG_PARENTESES
      printf("<PAREN> APPEND ");
      dumpln(elem);
#endif //DEBUG_PARENTHESES

      // append current element
      if( IS_EXTENDED(input) )
      {
        append_ex( elem, &result, &last );
        DEUSE( POS(last) );
        POS( last ) = USE( POS( input ) );
      }
      else
      append( elem, &result, &last );
      FLAGS(last) |= flags;

      // empty list
      if( IS_EMPTY(input) ) break;

      // move to next element
      input = CDR(input);
    }

    if( level && IS_EMPTY(input) )
    {
      DEUSE( result );
      return new_error( ERROR_INCOMPLETE_PAIR, openparen );
    }
    if( !level && !IS_EMPTY(input) )
    {
      DEUSE( result );
      return new_error( ERROR_INCOMPLETE_PAIR, CAR(input) );
    }
    if( level && IS_EMPTY(result) )
    {
      return new_error( ERROR_EMPTY_EXPRESSION, openparen );
    }

#ifdef DEBUG_PARENTHESES
    printf("<PAREN> EXIT LEVEL %d RESULT=", level);
    dumpln(result);
#endif //DEBUG_PARENTHESES
    if( level ) SET_FLAGS( result, FLAG_EXPRESSION );
    return result;
  } // _paren()


  int f;
  result = _paren( 0, empty_list, &f );

  return result;
}
atom_t define_user_function ( atom_t  source,
atom_t  input,
atom_t  parent 
)
Parameters:
sourceword containing the source
inputcontents between TO and END
parentcontext var where creation is done
Returns:
var atom or error atom

Defines new user-defined command or function. The input contains the contents of the function -- these are all tokens between TO and END. Var creation is done in several steps:

  • a new var atom is created with some temporary name
  • the left inputs are determined and created as local variables to the var atom. Left inputs are all consequitive words which start with colon : and are on a single line
  • the first token which cannot be a left input is considered as a function name. This token must be a word which does not start with colon : or double quotes ".
  • the right inputs are all tokens after the name which start with colon : and are on the same line.
  • a "..." after the last right input (or after the procedure name if there are no right inputs) determines that the procedure has unlimited number of accpeted inputs.
  • the body of the function is everything after the last right input (or funtion name) till the end. The only compulsory element of a function is its name. The left and right inputs could be missing. The body could be empty.
{
  #ifdef DEBUG_TO_END
  printf("<TO-END> DEFINING=");
  dumpln( input);
  #endif

  //atom_t last;
  atom_t a = input;
  atom_t name;
  atom_t lhi; // last_header_item

  // create the function
  atom_t function = new_var( word_to, parent, 1 ); //DEUSE(to);
  need_descr2( function );
  SET_FLAGS( function, FLAG_FUNCTION );
  LARGS( function ) = 0;
  RARGS( function ) = 0;

  // collecting left parameters
  while( IS_NOT_EMPTY(a) && 
  !GET_FLAGS(a,FLAG_NEWLINE) && 
  IS_ANY_WORD(CAR(a)) &&
  (*STRING(CAR(a))==TEXT(':')) )
  {
    lhi = CAR(a);
    atom_t var = new_local_var( CAR(a), function, 1 );
    if( IS_ERROR(var) )
    {
      DEUSE( input );
      //DEUSE( function );
      LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
      return var;
    }

    SET_FLAGS( var, FLAG_VARIABLE );
    LARGS( function )++;
#ifdef SAFE_MODE
    assert( LARGS(function)<255 );
#endif
    a = CDR(a);
  }

  // collecting name
  if( IS_NOT_EMPTY(a) && 
      !GET_FLAGS(a,FLAG_NEWLINE) &&
      IS_ANY_WORD(CAR(a)) &&
      (*STRING(CAR(a))!=TEXT('"')) )
  {
    lhi = CAR(a);
    name = CAR(a);
    a = CDR(a);
  }
  else
  {
    DEUSE( input );
    return new_error( ERROR_MISSING_NAME, a );
  }

  //printf("<TO-END> DEFINING="); dumpln( name );

  // collecting right parameters
  while( IS_NOT_EMPTY(a) && 
  !GET_FLAGS(a,FLAG_NEWLINE) && 
  IS_ANY_WORD(CAR(a)) &&
  (*STRING(CAR(a))==TEXT(':')) )
  {
    lhi = CAR(a);
    atom_t var = new_local_var( CAR(a), function, 1 );
    if( IS_ERROR(var) )
    {
      DEUSE( input );
      //DEUSE( function );
      LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
      return var;
    }
    SET_FLAGS( var, FLAG_VARIABLE );
    RARGS( function )++;
#ifdef SAFE_MODE
    assert( RARGS(function)<255 );
#endif
    a = CDR(a);
  }

  // collect "..."
  if( IS_NOT_EMPTY(a) &&
      !GET_FLAGS(a,FLAG_NEWLINE) &&
      IS_ANY_WORD(CAR(a)) &&
      (LENGTH(CAR(a))==3) &&
      (*(STRING(CAR(a))+0)==TEXT('.')) &&
      (*(STRING(CAR(a))+1)==TEXT('.')) &&
      (*(STRING(CAR(a))+2)==TEXT('.')) )
  {
    SET_FLAGS( function, FLAG_INFINITE_ARGS );
    a = CDR(a);
  }


  // set offset of parameters
  int offset = BASE_OFFSET_PARAMS; // this is the start offset
  atom_t x;
  for( x = LOCALS(function); IS_NOT_EMPTY(x); x=CDR(x) )
  {
    //printf("set offset of "); dump(NAME(CAR(x))); printf(" to be %d\n",offset);
    OFFSET(CAR(x)) = offset;
    offset += sizeof( atom_t );
  }


  // check whether the function is already defined
  atom_t var = find_local_var( name, parent );
  if( var )
  {
    if( LARGS(var)!=LARGS(function) || RARGS(var)!=RARGS(function) )
    {
      LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
      atom_t result = new_error( ERROR_INCOMPATIBLE_REDEFINITION, input );
      DEUSE( input );
      return result;
    }

    // forget data of the old function and
    // reuse data of the new function
    DEUSE( FULLSOURCE(var) );
    DEUSE( SOURCE(var) );
    DEUSE( LOCALS(var) );
    DEUSE( BODY(var) );
    DEUSE( TREE(var) );
    DEUSE( BINARY(var) );

    TREE(var)   = empty_list;
    BINARY(var) = empty_list;

    BODY(var)   = USE(a); //USE(BODY(function));
    LOCALS(var) = USE(LOCALS(function));

    LOCALS(parent) = behead(LOCALS(parent));
    //DEUSE(function);
    function = var;
  }
  else
  {
    DEUSE( NAME(function) );
    NAME( function ) = USE(name);
    BODY( function ) = USE(a);
  }

  //LEVEL( function ) = level;
  ADDRESS( function ) = 0;
  PRIORITY( function ) = PRIORITY_FUN;

  // cut header and 'END' from source
  chars_t new_src = STRING(lhi)+LENGTH(lhi);
  int new_len = LENGTH(source) - (new_src-STRING(source)) - LENGTH(word_end);
  SOURCE( function ) = new_subword( source, new_src, new_len );
  FULLSOURCE( function ) = USE( source );

  //DEUSE( input );
  #ifdef DEBUG_TO_END
  printf("<TO-END> DEFINED FUNCTION "); dumpln(NAME(function));
  printf("<TO-END>           SOURCE "); dumpln(SOURCE(function));
  printf("<TO-END>           BODY   "); dumpln(BODY(function));
  printf("<TO-END>           TREE   "); dumpln(TREE(function));
  #endif

  //printf("@@@@@@DEFINE "); dump_atom(NAME(function),1);
  //printf(" IN PARENT "); dump_atom(NAME(PARENT(function)),1);
  //printf("@@@@@@\n");


  return function;
}
atom_t preparse ( atom_t  input,
atom_t  parent,
int  level 
)
Parameters:
inputlist with tokenized source code
parentcontext var where preparsing is done
levelcurrent level of nesting return source without TO ... END definitions

Pre-parses a flat tokenized list by extracting all local TO ... END definitions and creating corresponding local functions. Returns the remaining source code.

{
  #ifdef DEBUG_TO_END
  printf("<TO-END> ENTER1=");
  dumpln( input);
  #endif

  atom_t a = input;
  atom_t before_a = NULL;
  atom_t before_to = NULL;
  atom_t first_to = NULL;
  int to_end_depth = 0;
  while( IS_NOT_EMPTY(a) )
  {
    if( IS_ANY_WORD(CAR(a)) )
    {
      // found TO or its synonym
      if( same_words(CAR(a),word_to) || same_words(CAR(a),word_to_syn) )
      {
        if( to_end_depth==0 )
        {
          before_to = before_a;
          first_to = a;
        }
        to_end_depth++;
      }

      // found END
      if( same_words( CAR(a), word_end ))
      {
        // END without to ?
        if( to_end_depth==0 )
        {
          atom_t result = new_error( ERROR_INCOMPLETE_PAIR, CAR(a) );
          DEUSE( input );
          return result;
        }

        to_end_depth--;
        if( to_end_depth==0 )
        {
          // found END corresponding to TO.
          // extract the whole TO..END
          if( first_to==before_a )
          {
            DEUSE( input );
            return new_error( ERROR_EMPTY_TO_END, first_to );
          }

          atom_t word = new_subword(WORD(CAR(first_to)),STRING(CAR(first_to)),(STRING(CAR(a))-STRING(CAR(first_to))+LENGTH(word_end)));
          //atom_t word = empty_list;

          first_to = behead(first_to);
          //first_to = CDR(first_to);
          CDR(before_a) = empty_list;
          before_a = before_to;
          a = behead(a);
          if( before_to )
          CDR(before_a) = a; // TO is not first item
          else
          input = a; // TO is first item

          #ifdef DEBUG_TO_END
          printf("<TO-END> EXTRACT:");
          dumpln( first_to );
          printf("<TO-END> CURRENT INPUT:");
          dumpln( input );
          printf("<TO-END> DEFINE IN PARENT:");
          dumpln( parent );
          #endif

          // create new command/function
          atom_t var = define_user_function( word, (first_to), parent );
          DEUSE( word );
          if( IS_ERROR(var) ) 
          {
            DEUSE( input );
            return var;
          }
          DEUSE( first_to );

          // process recursively the function
          // for nested to-end definitions
          BODY(var) = preparse( BODY(var), var, level+1 );

          // next item is set explicitely,
          SET_FLAGS( a, FLAG_NEWLINE );
          first_to = NULL;
          before_to = NULL;
          continue;
        }
      }
    }

    // next item
    before_a = a;
    a = CDR(a);
  }
  
  // TO without END ?
  if( first_to )
  {
    atom_t result = new_error( ERROR_INCOMPLETE_PAIR, CAR(first_to) );
    DEUSE(input);
    return result;
  }

  #ifdef DEBUG_TO_END
  printf("<TO-END> LEFTOVERS=");
  dumpln( input);
  printf("\n");
  #endif

  return input;
}
parse ( atom_t  input,
atom_t  parent,
int  top_level 
)
Parameters:
inputunparsed list
parentparent var in which parsing is done
top_levelflag whether parsing is at its top-level
Returns:
parsed abstract syntax tree

Parses a list into a tree (aka LISP notation). The list is supposed to be tokenized as commands. The parent variable provides a context of variables which can be referenced from the parsed input.

running_compiled_code && bug #3442773

{
  atom_t   vars[MAX_ELEMS];
  atom_t   data[MAX_ELEMS];
  atom_t  poses[MAX_ELEMS]; // source position (if available)
  int    pris[MAX_ELEMS];
  int    usedby[MAX_ELEMS];
  int     largs[MAX_ELEMS]; // number of unlinked left arguments
  int     rargs[MAX_ELEMS]; // number of unlinked right arguments
  int count; // count of elements in above arrays

  atom_t aux = empty_list;  // additional lists to delete

  //printf("parse==");dumpln(input);

  #ifdef DEBUG_PARSE
  void dump_arrays( )
  {
    int i;
    for( i=0; i<count; i++ ) if( pris[i]>=PRIORITY_MIN )
    {
      printf("<PARSE>   %2d.[used by %2d; free=%d:%d] v%c0 pr=%d   =",
      i, usedby[i], largs[i], rargs[i], vars[i]?'#':'=', pris[i]);
      dump_atom(data[i],1);
      if( !IS_UNBOUND(poses[i]) )
      {
        printf(" @ ");
        dump_atom(poses[i],1);
      }
      printf("\n");
    }
    printf("\n");
  }
  #endif


  // Finds the first line of input. Return a pointer
  // to the first node after the line. Sets arrays
  // vars[], data[] and prios[]. 
  atom_t get_line( atom_t input )
  {
    int has_no_infinite = 1;
    count = 0;
    while( IS_NOT_EMPTY(input) )
    {
      atom_t elem = USE(CAR(input));
      //printf("testing element ");
      //dumpln(elem);

      #ifdef SAFEMODE
      assert( count<MAX_ELEMS );
      #endif
      data[count] = elem;
      vars[count] = IS_ANY_WORD(elem) ? find_var(elem,parent) : NULL;
      if( VAR && !IS_COMMAND(VAR) && !IS_FUNCTION(VAR) )
      {
        // If we have variable called 4 (e.g. MAKE 4 "BOZA)
        // then VAR!=NULL, but this is allowed only for
        // functions and commands, so set VAR=NULL!
        VAR=NULL;
      }
      pris[count] = (VAR ? PRIORITY(VAR) : PRIORITY_MAX);
      usedby[count] = -1;
      largs[count] = VAR ? LARGS(VAR) : 0;
      rargs[count] = VAR ? RARGS(VAR) : 0;
      poses[count] = IS_EXTENDED(input)?POS(input):unbound;

      if( VAR && IS_VARIABLE(VAR) )
      {
        printf("found variable as function: ");
        dump_atom(NAME(VAR),1);
        printf("\n");
      }

      // if element is a word not starting with ":" and
      // cannot be transfered into a number and does not
      // exist as a variable, then this is unknown function
      float64_t x;
      if( IS_ANY_WORD(elem) && 
          !VAR &&
          *STRING(elem)!=TEXT(':') &&
          *STRING(elem)!=TEXT('"') &&
          !atom_to_float(elem,&x) )
      {
        //DEUSE( input );
        DEUSE( elem );
        return new_error( ERROR_DO_NOT_KNOW, elem );
      }

      if( !top_level && VAR && has_no_infinite )
      {
        if( GET_FLAGS(VAR,FLAG_INFINITE_ARGS) )
        {
          rargs[count]=-1;
          has_no_infinite = 0;
        }
      }

      // test for unary
      if( VAR && GET_FLAGS(VAR,FLAG_CAN_BE_UNARY) && largs[count]==1 )
      {
        // case 1:   <spc> op <spc> -> binary
        // case 2:  <spc> op  -> unary
        // case 3:        op <spc>  -> binary
        // case 4:        op  -> binary
        if( GET_FLAGS(input,FLAG_AFTER_SPACE|FLAG_BEFORE_SPACE)
            == FLAG_AFTER_SPACE )
        largs[count]=0;
      }

      count++;
      //next:
      input = CDR(input);

      // if the next item is on a new line AND if it is
      // not constant-list AND we are not in a sublist
      // then break the line here
      if( top_level && IS_NOT_EMPTY(input) && GET_FLAGS(input,FLAG_NEWLINE) &&
          (!IS_LIST(CAR(input)) || IS_EXPRESSION(CAR(input))) )
      break;
    } // line while

    return input;
  } //get_line()

  #ifdef DEBUG_PARSE
  printf("<PARSE> ENTER=");
  dumpln( input);
  #endif


  //-----------------------------------------
  // STEP 1: Convert (..) into subexpressions
  //-----------------------------------------
  if( IS_EMPTY(input) ) return input;
  if( IS_ERROR(input) ) return input;
  atom_t orig_input = input = parentheses( input );
  if( IS_ERROR(input) ) return input;

  #ifdef DEBUG_PARSE
  printf("<PARSE> PARENTHESISEZ=");
  dumpln( input);
  #endif

  // if there is no parent consider the root as parent
  if( !parent ) parent = root;


  //-----------------------------------------
  // STEP 2-7: Convert list into lisp line-by-line
  //-----------------------------------------
  atom_t result = empty_list;
  atom_t last = empty_list;

  atom_t err = NULL;

  int i,j,finished;
  int* args;

#define LINK \
    { \
      usedby[j] = i; \
      args[i]--; \
    }
  //printf("%d is used by %d\n",j,i);         

#define RELINK \
    { \
      if( j<usedby[j] ) { largs[usedby[j]]++; finished=0; } \
      if( j>usedby[j] ) { rargs[usedby[j]]++; finished=0; } \
      LINK; \
    }

  while( IS_NOT_EMPTY(input) ) // main while
  {
    next_line: 
    #ifdef DEBUG_PARSE
    printf("\n\n");
    printf("<PARSE>------------------\n");
    printf("<PARSE> START A NEW LINE \n");
    printf("<PARSE>------------------\n");
    #endif

    //-----------------------------------
    // STEP 2: Get a line from the input
    //-----------------------------------
    input = get_line( input );
    if( IS_ERROR(input) )
    {
      err = input;
      break;
    }
    #ifdef DEBUG_PARSE
    printf("<PARSE> ORIGINAL PARSE_LIST:\n");
    dump_arrays( );
    #endif
    //printf("<PARSE> LEFTOVER:"); dumpln(input);
    //printf("------------------------------------\n");

    
    //-----------------------------------
    // STEP 3: Analyze and set dependencies between items
    //-----------------------------------
    finished = 0;
    while( !finished )
    {
      finished = 1;
      for( i=0; i<count; i++ )
      {

        // now process right parameters
        args = rargs;
        for( j=i+1; args[i] && j<count; j++ )
        {
          if( vars[j] && !IS_FUNCTION(vars[j]) )
          break;
          if( usedby[j]==-1 )
          LINK
          else
          if( (usedby[j]>j || usedby[j]<i) && 
              (pris[usedby[j]]<=pris[i]) )
          RELINK;
        } // for largs

        // now process left parameters
        args = largs;
        for( j=i-1; args[i] && j>=0; j-- )
        {
          //if(vars[j])
          //{
          //printf("j=%d cond=%d var=",j,!IS_FUNCTION(vars[j]));
          //dumpln(vars[j]);
          //}
          if( vars[j] && !IS_FUNCTION(vars[j]) )
          break;
          if( usedby[i]==j ) break;
          if( usedby[j]==-1 )
          {
            /*if( usedby[i]!=j )*/ LINK; // skip direct circular references
          }
          else
          if( (usedby[j]<j || usedby[j]>i) && 
              (pris[usedby[j]]<pris[i]) ) 
          {
            /*if( usedby[i]!=j )*/ RELINK; // skip direct circular reference
          }
        } // for largs
      } // for each element
    } // while not finished

    #ifdef DEBUG_PARSE
    printf("<PARSE> FINAL PARSE_LIST:\n");
    dump_arrays( );
    #endif


    //-----------------------------------
    // STEP 4: Check for extra or missing arguments
    //-----------------------------------
    #ifdef DEBUG_PARSE
    printf("<PARSE> START STEP 4:\n");
    dump_arrays( );
    #endif

    args = rargs;
    int top_level_pos = -1;
    for( j=0; j<count; j++ )
    {
      // check whether there are two or more free items
      // only one is allowed for non-top levels
      if( usedby[j]==-1 )
      {
        if( !top_level && top_level_pos>=0 )
        {
          // we are ready to announce that there are extra values
          // but before doing this check whether there is function
          // that would be so polite to take any of them
          //printf(">>> top_level_pos=%d\n",top_level_pos);
          if( rargs[top_level_pos]==0 && 
              vars[top_level_pos] &&
              GET_FLAGS(vars[top_level_pos],FLAG_MAY_HAVE_EXTRA_ARG) )
          {
            i = top_level_pos;
            //printf(">>> attach %d to %d\n",j,i);
            //printf(">>> rargs
            LINK;
            //printf(">>>>>>>"); dumpln(NAME(vars[top_level_pos]));
          }
          else
          {
            err = new_error( ERROR_CROWDED_EXPRESSION, data[j] );
            break;
          }
        }
        if( top_level_pos<0 ) top_level_pos = j;
      }

      // left paramers can be skipped only for unary functions
      if( largs[j] && !GET_FLAGS(vars[j],FLAG_CAN_BE_UNARY) )
      {
        err = new_error( ERROR_MISSING_LEFTS, data[j] );
        break;
      }

      // some functions may ignore 1 missing right parameter
      if( rargs[j]==1 && GET_FLAGS(vars[j],FLAG_MAY_SKIP_LAST_ARG) )
      continue;

      // when arguments can be infinite, this also mean any number
      if( rargs[j] && !GET_FLAGS(vars[j],FLAG_INFINITE_ARGS) )
      {
        //printf("rargs=%d\n",rargs[j]);
        err = new_error( ERROR_MISSING_RIGHTS, data[j] );
        break;
      }
    }

    if( err ) break;


    //-----------------------------------
    // STEP 5: Recursivelly process nested expressions
    //     and arguments which must be lisp'ed too
    //-----------------------------------
    #ifdef DEBUG_PARSE
    printf("<PARSE> START STEP 5:\n");
    dump_arrays( );
    #endif
    for( i=0; i<count; i++ ) if( IS_LIST(data[i]) )
    {
      if( GET_FLAGS(data[i],FLAG_EXPRESSION) )
      { // nested expression
        atom_t expr = parse( data[i], parent, 0 );
        
        if( IS_ERROR(expr) ) 
        {
          err = expr;
        }
        else
        {
          DEUSE( data[i] );
          data[i] = expr;
          SET_FLAGS( data[i], FLAG_EXPRESSION );
        }
      }
      else
      { // possibly argument to be reparsed
        j=usedby[i];
        if( (j>=0) && vars[j] && GET_FLAGS(vars[j],FLAG_PROCESS_ARGS) )
        {
          atom_t expr = build_syntax_subtree( parent, data[i] );
          if( IS_ERROR(expr) ) 
          {
            err = expr;
          }
          else
          {
            DEUSE( data[i] );
            data[i] = expr;
            SET_FLAGS( data[i], FLAG_EXPRESSION+FLAG_WAS_LIST_CONST );
          }
        }
      }
    }
    if( err ) break;


    //-----------------------------------
    // STEP 6: Group items of subexpressions, process numbers
    //-----------------------------------
    #ifdef DEBUG_PARSE
    printf("<PARSE> START STEP 6:\n");
    dump_arrays( );
    #endif
    for( i=0; i<count; i++ )
    {
      /*
      // convert all numeric data into numbers
      float64_t n;
      if( IS_ANY_WORD(data[i]) && atom_to_float(data[i],&n) )
        {
          DEUSE( data[i] );
          data[i] = new_float( n );
        }

      // create a list node for each element
      // whatever the element is, this list node will be used
      data[i] = new_list( data[i], empty_list );
      SET_FLAGS( data[i], FLAG_EXPRESSION );
      */
      
      // create a list node for each element
      // whatever the element is, this list node will be used
      float64_t n;
      if( IS_ANY_WORD(data[i]) && atom_to_float(data[i],&n) )
      {
        atom_t pos = data[i]; // old source of the number
        data[i] = new_list_ex( new_float(n), empty_list );
        POS( data[i] ) = pos;
      }
      else
      {
        data[i] = new_list( data[i], empty_list );
      }
      SET_FLAGS( data[i], FLAG_EXPRESSION );
    }

    for( i=0; i<count; i++ )
    {
      if( usedby[i]>=0 )
      {
        atom_t a = data[usedby[i]];
        atom_t name = CAR(data[i]);
        atom_t function = vars[usedby[i]];
        //printf("name(%d)=",i);dumpln(name);
        //printf("function(%d)=",usedby[i]);dumpln(function);
        //printf("a(%d)=",usedby[i]);dumpln(a);

        // if the item is word constant and is used by
        // a function which creates variables, then do
        // create the variables now.
        if( GET_FLAGS(function,FLAG_SET_ALL_VARS)
            && IS_ANY_WORD(name)
            && LENGTH(name)>1
            && *STRING(name)==TEXT('"') )
        {
          //printf("create local "); dumpln(name);
          //printf("          in "); dumpln(NAME(parent));
          atom_t var = new_local_var( name, parent, 1 );
          if( IS_ERROR(var) ) err = var;
          SET_FLAGS( var, FLAG_VARIABLE );
        }

        // if the item is word constant and is used by
        // a function which creates a single variable
        // (like MAKE), then do create the variable as
        // global if it does not exist
        // 2010.06.26: create the variable only if the
        // parsing has not been activated at run-time.
        atom_t real_name = NULL;
        if( !running_compiled_code
            && GET_FLAGS(function,FLAG_SET_ONE_VAR)
            && (i==usedby[i]+1)
            && (IS_INTEGER(name) || IS_FLOAT(name)) )
        {
          real_name = atom_to_word( name );
          goto use_numeric_name;
        }

        if( 
            GET_FLAGS(function,FLAG_SET_ONE_VAR)
            && (i==usedby[i]+1)
            && IS_ANY_WORD(name)
            && LENGTH(name)>1
            && *STRING(name)==TEXT('"') )
        {
          real_name = new_subword( name, STRING(name)+1, LENGTH(name)-1 );
          use_numeric_name:
          if( !find_var( real_name, parent ) )
          {
            atom_t var = new_local_var( real_name, parent/*root*/, 0 ); //printf("!2087!");
            if( IS_ERROR(var) ) err = var;
            SET_FLAGS( var, FLAG_VARIABLE );
          }
          DEUSE( real_name );

          // patch for FOR command. If the control
          // variable is ABC then create ABC and
          // ^ABC
          if( ADDRESS(function)==(int)rt_for )
          {
            assert( IS_ANY_WORD(name) );
            atom_t real_name = new_word( STRING(name), LENGTH(name) );
            *STRING(real_name) = L'^';
            if( !find_var( real_name, parent ) )
            {
              atom_t var = new_local_var( real_name, parent/*root*/, 0 ); //printf("!2103!");
              if( IS_ERROR(var) ) err = var;
              SET_FLAGS( var, FLAG_VARIABLE );
            }
            DEUSE( real_name );
          }
          
        }

        // if the item is word constant and is used by
        // a function which load libraries, then do
        // load the library now.
        if( ADDRESS(function)==(int)rt_load
            && IS_ANY_WORD(name)
            && LENGTH(name)>1
            && *STRING(name)==TEXT('"') )
        {
          atom_t real_name = new_word( STRING(name)+1, LENGTH(name)-1 );
          atom_t word = read_word( STRING(real_name) );
          DEUSE( real_name );

          if( IS_ERROR(word) ) { err = USE(word); break; }
          atom_t trimmed = trim_shell_comment( word );
          DEUSE( word );

          atom_t tokens1 = tokenize( trimmed, TOKENIZE_DATA );
          DEUSE( trimmed );
          if( IS_ERROR(tokens1) )
          {
            err = tokens1;
            break;
          }
          
          atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
          DEUSE( tokens1 );
          if( IS_ERROR(tokens2) ) return tokens2;

          tokens2 = preparse( tokens2, root, LEVEL(root) );
          atom_t body = parentheses( tokens2 );
          DEUSE( tokens2 );

          //printf("BODY=");dumpln(body);
          //printf("input=");dumpln(input);

          if( IS_NOT_EMPTY(body) )
          {
            atom_t a;
            for( a = body; IS_NOT_EMPTY(CDR(a)); a=CDR(a) );
            CDR(a) = USE(input);
            input = body;
          }
          
          //printf("old aux="); dumpln(aux);
          aux = new_list( body, aux );
          //printf("new aux="); dumpln(aux);
          for( i=0; i<count; i++ ) DEUSE( data[i] );
          goto next_line;
        }

        SET_FLAGS( a, FLAG_EXPRESSION );
        while( IS_NOT_EMPTY(CDR(a)) ) a=CDR(a);
        CDR(a) = vars[i] ? new_list(data[i],empty_list) : data[i];
      }
    }
    if( err ) break;


    //-----------------------------------
    // STEP 7: Group top-most items into result list
    //-----------------------------------
    #ifdef DEBUG_PARSE
    printf("<PARSE> START STEP 7:\n");
    dump_arrays( );
    #endif
    //printf("data[0]=");dumpln(data[0]);
    //printf("vars[0]=");dumpln(vars[0]);
    for( i=0; i<count; i++ )
    if( usedby[i]==-1 )
    {
      //printf("i=%d var=%x is_expr=%d\n",i,vars[i],IS_EXPRESSION(CAR(data[i])));
      //boza if( !vars[i] && !IS_EXPRESSION(CAR(data[i])))
      //boza  {
      //printf("i=%d\n",i);
      //boza err = new_error_atom( ERROR_UNUSED_VALUE, data[i] );
      //boza break;
      //boza }
      if( IS_UNBOUND(poses[i]) )
      {
        append( data[i], &result, &last );
      }
      else
      {
        append_ex( data[i], &result, &last );
        DEUSE( POS(last) );
        POS( last ) = USE( poses[i] );
      }
    }

    if( err ) break;
  } // main line


  //-----------------------------------------
  // STEP pre-8: Test for error and release unused resources
  //-----------------------------------------
  DEUSE( orig_input );
  DEUSE( aux );
  if( err )
  {
    //printf("ERRRRRRROR\n");
    for( i=0; i<count; i++ ) DEUSE( data[i] );
    DEUSE( result );
    return err;
  }

  //-----------------------------------
  // STEP 8: Remove unnecessary parentheses
  //-----------------------------------
  //printf("before=");dumpln(result);
  //
  // step 8 removed, because in some cases it removes more
  // parentheses than needed - e.g. when function body has
  // only one command
  // while( IS_EMPTY(CDR(result)) && 
  //   IS_LIST(CAR(result)) &&
  //   GET_FLAGS(CAR(result),FLAG_EXPRESSION) )
  //  {
  //    atom_t a = CAR(result);
  //    CAR(result) = empty_list;
  //    DEUSE( result );
  //    result = a;
  //  }
  //
  //printf(" after=");dumpln(result);

  if( !top_level ) SET_FLAGS( result, FLAG_EXPRESSION );

  #ifdef DEBUG_PARSE
  printf("<PARSE> EXIT=");
  dumpln(result);
  #endif

  if( OPTION_DUMP_AST )
  {
    outter( TEXT("Abstract Syntax Tree:\n\0"), UNKNOWN );
    dumpln( result );
    outter( TEXT("\n\0"), UNKNOWN );
  }

  return result;
}
Parameters:
funcvar atom for the parse context
Returns:
empty_list or an error atom

Parses completely a function. Its source is stored in its body as word, subword, data-tokenized list or command-tokenized list. Building algorithm:

  • tokenization of body as commands
  • extracting all TO ... ENDs and create them as subfunctions
  • parsing the func's body into abstract syntax tree
  • recursively build trees of subfunctions
{
  // exit is function is already treefied
  if( IS_NOT_EMPTY(TREE(func)) ) return empty_list;

  //printf("BUILD_SYNTAX_TREE(FUNC=");
  //dump(NAME(func));
  //printf(",SOURCE=");
  //dump(SOURCE(func));
  //printf(",BODY=");
  //dump(BODY(func));
  //printf(",LOCALS=");
  //dump(LOCALS(func));
  //printf(")\n\n");

  if( IS_EMPTY(BODY(func)) )
  {
    // Step 1. Tokenize
    //printf("SOURCE="); dumpln(SOURCE(func));
    atom_t tokens1 = tokenize( SOURCE(func), TOKENIZE_DATA );
    if( IS_ERROR(tokens1) ) return tokens1;
    //printf("TOKENS1="); dumpln(tokens1);

    atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
    DEUSE( tokens1 );
    if( IS_ERROR(tokens2) ) return tokens2;
    //printf("TOKENS2="); dumpln(tokens2);

    // Step 2. Extract TO..END's
    atom_t body = preparse( tokens2, func, LEVEL(func) );
    if( IS_ERROR(body) ) return body;
    DEUSE( BODY(func) );
    BODY(func) = body;
    //printf("BODY="); dumpln(BODY(func));
  }


  // Step 3. Parse function body
  //printf("BODY="); dumpln(BODY(func));
  atom_t tree = parse( BODY(func), func, 1 );
  if( IS_ERROR(tree) ) return tree;
  DEUSE(TREE(func));
  TREE(func) = tree;
  //printf("TREE="); dumpln(TREE(func));

  // because the might be some new TO..ENDs
  // scan all locals and build those which
  // have no trees
  atom_t local;
  atom_t locals;
  for( locals=LOCALS(func); IS_NOT_EMPTY(locals); locals=CDR(locals) )
  {
    local = CAR(locals);
    if( !DESCR2(local) ) continue;
    atom_t x = build_syntax_tree( local );
    if( IS_ERROR(x) ) return x;
  }

  return empty_list;
}

Variable Documentation

Initial value:
<>|? */

char_t enbar[128]

table for |a|->a conversions


[ HOME | INDEX | ATOMS | VARS | REFERENCE ]
Lhogho Developer's Documentation
Tue Feb 7 2012