Defines
#define	GET_CHAR ch=*source
	get current character
#define	VAR vars[count]
	shortens access to vars array
#define	LINK
#define	RELINK
Tokentypes
#define	TOKEN_END 0
	no more tokens available
#define	TOKEN_WORD 1
	normal word token
#define	TOKEN_DIRTY_WORD 2
	word token with unhandled backslashs or bars
#define	TOKEN_OPEN 3
	open square bracket token
#define	TOKEN_CLOSE 4
	close square bracket token
#define	TOKEN_LINEEND 5
	end of line token
#define	TOKEN_SPACE 6
	whitespace token
#define	TOKEN_ERROR 7
	error occured
Parse modes
#define	MODE_ENTRY 0
	entry mode
#define	MODE_SPACE 1
	space mode
#define	MODE_WORD 2
	word mode
#define	MODE_BARRED 3
	barred mode
#define	MODE_BACKSLASHED 4
	backslashed mode
#define	MODE_TILDE 5
	tilde mode
#define	MODE_SEMITILDE 6
	semitilde mode
#define	MODE_SEMICOLON 7
	semicolon mode
#define	MODE_TILDESPACE 8
	tildespace mode
#define	MODE_LESS 9
	less mode
#define	MODE_GREATER 10
	greater mode
#define	MAX_MODE 11
	greatest mode + 1
Microprogram defines (basic)
#define	PAT_TILDE 0x000001
	push tilde
#define	PAT_PUSH 0x000002
	push last character
#define	PAT_NEXT 0x000004
	goto next character
#define	PAT_DIRTY 0x000008
	set dirty flag
#define	PAT_MUTATED 0x000010
	set mutated flag
#define	PAT_RETURN_TO_WORD 0x000020
	next return will return to word mode
#define	PAT_RETURN_TO_SELF 0x000040
	next return will return to current mode
#define	PAT_GOTO 0x000080
	follows 4bit go-to-mode
#define	PAT_TOKEN 0x000100
	follows 4bit token_id
#define	PAT_RETURN 0x000200
	return to stack-to-mode
#define	PAT_ERROR_POS 0x000400
	remember current position as error position
#define	PAT_ERROR 0x000800
	generate error
#define	PAT_RETURN_TO_CALLER 0x001000
	next return will return to the caller of this mode
#define	PAT_NEXT2 0x002000
	goto next next character
Microprogram defines (compound)
#define	PAT_SHIFT 16
	shift factor
#define	PAT_TOKEN_END PAT_TOKEN+(TOKEN_END<<PAT_SHIFT)
	microprogram for end token
#define	PAT_TOKEN_SPACE PAT_TOKEN+(TOKEN_SPACE<<PAT_SHIFT)
	microprogram for space token
#define	PAT_TOKEN_WORD PAT_TOKEN+(TOKEN_WORD<<PAT_SHIFT)
	microprogram for word token
#define	PAT_TOKEN_LINE PAT_TOKEN+(TOKEN_LINEEND<<PAT_SHIFT)
	microprogram for line end token
#define	PAT_TOKEN_OPEN PAT_TOKEN+(TOKEN_OPEN<<PAT_SHIFT)
	microprogram for open token
#define	PAT_TOKEN_CLOSE PAT_TOKEN+(TOKEN_CLOSE<<PAT_SHIFT)
	microprogram for close token
#define	PAT_GOTO_SPACE PAT_GOTO+(MODE_SPACE<<PAT_SHIFT)
	microprogram for goto space mode
#define	PAT_GOTO_WORD PAT_GOTO+(MODE_WORD<<PAT_SHIFT)
	microprogram for goto word mode
#define	PAT_GOTO_BACKSLASHED PAT_GOTO+(MODE_BACKSLASHED<<PAT_SHIFT)
	microprogram for goto backslashed mode
#define	PAT_GOTO_BARRED PAT_GOTO+(MODE_BARRED<<PAT_SHIFT)
	microprogram for goto barred mode
#define	PAT_GOTO_SEMICOLON PAT_GOTO+(MODE_SEMICOLON<<PAT_SHIFT)
	microprogram for goto semicolon mode
#define	PAT_GOTO_TILDE PAT_GOTO+(MODE_TILDE<<PAT_SHIFT)
	microprogram for goto tilde mode
#define	PAT_GOTO_TILDESPACE PAT_GOTO+(MODE_TILDESPACE<<PAT_SHIFT)
	microprogram for goto tildespace mode
#define	PAT_GOTO_SEMITILDE PAT_GOTO+(MODE_SEMITILDE<<PAT_SHIFT)
	microprogram for goto semitilde mode
#define	PAT_GOTO_LESS PAT_GOTO+(MODE_LESS<<PAT_SHIFT)
	microprogram for goto less mode
#define	PAT_GOTO_GREATER PAT_GOTO+(MODE_GREATER<<PAT_SHIFT)
	microprogram for goto greater mode
Special characters
When a chracter is barrable and is actually barred then its code is changed. For example codes for space and barred space are different for Lhogho, but should appear the same for users. Barrable characters are ()+-
#define	MAX_ELEMS 128
	maximal number of elements in a statement
char_t	debar [32]
	table for a->\|a\| conversions
atom_t	build_syntax_subtree (atom_t function, atom_t source)
	parses sublist of commands
void	init_parser ()
	initializes parser
atom_t	tokenize (atom_t input, int method)
	tokenizes into a list
atom_t	trim_shell_comment (atom_t word)
	trims shell comment (if any)
atom_t	purify (atom_t word)
	purifies a word
atom_t	parentheses (atom_t input)
	processes parentheses
atom_t	define_user_function (atom_t source, atom_t input, atom_t parent)
	creates var atom for a user-defined function
atom_t	preparse (atom_t input, atom_t parent, int level)
	extracts all `TO` ... `END` definitions
atom_t	parse (atom_t input, atom_t parent, int top_level)
	parses a list into a tree
atom_t	build_syntax_tree (atom_t func)
	parses body of user-defined function

Define Documentation

#define GET_CHAR ch=*source

#define VAR vars[count]

#define TOKEN_END 0

#define TOKEN_WORD 1

#define TOKEN_DIRTY_WORD 2

#define TOKEN_OPEN 3

#define TOKEN_CLOSE 4

#define TOKEN_LINEEND 5

#define TOKEN_SPACE 6

#define TOKEN_ERROR 7

#define MODE_ENTRY 0

#define MODE_SPACE 1

#define MODE_WORD 2

#define MODE_BARRED 3

#define MODE_BACKSLASHED 4

#define MODE_TILDE 5

#define MODE_SEMITILDE 6

#define MODE_SEMICOLON 7

#define MODE_TILDESPACE 8

#define MODE_LESS 9

#define MODE_GREATER 10

#define MAX_MODE 11

#define PAT_TILDE 0x000001

#define PAT_PUSH 0x000002

#define PAT_NEXT 0x000004

#define PAT_DIRTY 0x000008

#define PAT_MUTATED 0x000010

#define PAT_RETURN_TO_WORD 0x000020

#define PAT_RETURN_TO_SELF 0x000040

#define PAT_GOTO 0x000080

#define PAT_TOKEN 0x000100

#define PAT_RETURN 0x000200

#define PAT_ERROR_POS 0x000400

#define PAT_ERROR 0x000800

#define PAT_RETURN_TO_CALLER 0x001000

#define PAT_NEXT2 0x002000

#define PAT_SHIFT 16

#define PAT_TOKEN_END PAT_TOKEN+(TOKEN_END<<PAT_SHIFT)

#define PAT_TOKEN_SPACE PAT_TOKEN+(TOKEN_SPACE<<PAT_SHIFT)

#define PAT_TOKEN_WORD PAT_TOKEN+(TOKEN_WORD<<PAT_SHIFT)

#define PAT_TOKEN_LINE PAT_TOKEN+(TOKEN_LINEEND<<PAT_SHIFT)

#define PAT_TOKEN_OPEN PAT_TOKEN+(TOKEN_OPEN<<PAT_SHIFT)

#define PAT_TOKEN_CLOSE PAT_TOKEN+(TOKEN_CLOSE<<PAT_SHIFT)

#define PAT_GOTO_SPACE PAT_GOTO+(MODE_SPACE<<PAT_SHIFT)

#define PAT_GOTO_WORD PAT_GOTO+(MODE_WORD<<PAT_SHIFT)

#define PAT_GOTO_BACKSLASHED PAT_GOTO+(MODE_BACKSLASHED<<PAT_SHIFT)

#define PAT_GOTO_BARRED PAT_GOTO+(MODE_BARRED<<PAT_SHIFT)

#define PAT_GOTO_SEMICOLON PAT_GOTO+(MODE_SEMICOLON<<PAT_SHIFT)

#define PAT_GOTO_TILDE PAT_GOTO+(MODE_TILDE<<PAT_SHIFT)

#define PAT_GOTO_TILDESPACE PAT_GOTO+(MODE_TILDESPACE<<PAT_SHIFT)

#define PAT_GOTO_SEMITILDE PAT_GOTO+(MODE_SEMITILDE<<PAT_SHIFT)

#define PAT_GOTO_LESS PAT_GOTO+(MODE_LESS<<PAT_SHIFT)

#define PAT_GOTO_GREATER PAT_GOTO+(MODE_GREATER<<PAT_SHIFT)

#define MAX_ELEMS 128

#define LINK

Value:

{ \
      usedby[j] = i; \
      args[i]--; \
    }

#define RELINK

Value:

{ \
      if( j<usedby[j] ) { largs[usedby[j]]++; finished=0; } \
      if( j>usedby[j] ) { rargs[usedby[j]]++; finished=0; } \
      LINK; \
    }

Function Documentation

atom_t build_syntax_subtree	(	atom_t	function,
		atom_t	source
	)

Parameters:

function	var atom for the parse context
source	word or list containing the source

Returns:: abstract syntax tree or an error atom

Parses a list of commands (like these in IF or REPEAT) and build an abstract syntax tree:

tokenization of body as commands
extracting all TO ... ENDs and create them as subfunctions
parsing the func's body into abstract syntax tree
recursively build trees of subfunctions

{
  //printf("BUILD_SYNTAX_SUBTREE(");
  //dump(NAME(function));
  //printf(",");
  //dump(source);
  //printf(")\n\n");

  // tokenize
  atom_t tokens1 = tokenize( source, TOKENIZE_DATA );
  if( IS_ERROR(tokens1) ) return tokens1;
  //printf("tokens1="); dumpln(tokens1);

  atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
  DEUSE( tokens1 );
  if( IS_ERROR(tokens2) ) return tokens2;
  //printf("tokens2="); dumpln(tokens2);

  // extract TO..END's
  atom_t body = preparse( tokens2, function, LEVEL(function) );
  if( IS_ERROR(body) ) return body;
  //printf("body="); dumpln(body);

  // parse function body
  atom_t tree = parse( body, function, 1 );
  if( IS_ERROR(tree) ) return tree;
  //printf("tree="); dumpln(tree);

  DEUSE( tokens2 );
  //DEUSE( body );
  return tree;
}

void init_parser ( )

Initializes tables enbar[] and debar[] which are used to enbar and debar a character.

{
  int i;

  // NOTE! if the string of enbarrable characters is
  // changed, also change dump_word()
  chars_t s = TEXT("______________()+-*/=<>|?_______");
  //locked->        x........xx..x..................

  for( i=0; i<32;  i++ ) debar[i]=i;   // identity matrix
  for( i=0; i<128; i++ ) enbar[i]=i;   // identity matrix

  for( i=0; *s; i++,s++ )
  {
    if( *s==TEXT('_') ) continue;
    debar[i] = *s;
    enbar[(unsigned char)*s] = i;
  }
}

atom_t tokenize	(	atom_t	input,
		int	method
	)

Parameters:

input	word, subword or list to be tokenized
method	method of tokenization

Returns:: tokenized list

Tokenizes a word, a subword or a list into a list. If possible makes all words as subwords. Backslashes and bars in words are preserved (i.e. words are not purified). Comments and line continuations are ignored.

If the method is TOKENIZE_DATA then the input is tokenized as if it contains Logo data. If the method is TOKENIZE_COMMANDS then the input is tokenized as if it contains Logo commands. If the method is TOKENIZE_READWORD then the input is tokenized as expected by READWORD function.

If the input is a list then all its elements are tokenized one-by-one.

Return value is the tokenized list. In there is an error, returns an error atom which error code is ERROR_INCOMPLETE_PAIR, error position points the position in the word (0-based) and the error source is the word itself.

{
  if( IS_LIST(input) )
  {
    if( method==TOKENIZE_DATA && 
        GET_FLAGS( input, FLAG_TOKENIZED_DATA|FLAG_TOKENIZED_COMMANDS ) )
    return USE(input);
    if( method==TOKENIZE_COMMANDS && 
        GET_FLAGS( input, FLAG_TOKENIZED_COMMANDS ) )
    return USE(input);
  }

  if( IS_FLOAT(input) )
  {
    return USE(input);
  }

  //printf("\n");
  //if(method==TOKENIZE_DATA)
  //  printf("ENTER TOKENIZE_DATA(");
  //else
  //  printf("ENTER TOKENIZE_COMMANDS(");
  //dump_atom(input,1);
  //printf(")\n");


  #ifdef SAFEMODE
  assert( IS_WORD(input) || IS_SUBWORD(input) || IS_LIST(input) );
  #endif

  // First check whether the input is a list.
  // If it is then tokenize recursively all its elements.
  if( IS_LIST(input) )
  { 
    atom_t result = empty_list;
    atom_t last   = empty_list;
    atom_t x;
    atom_t y;
    for( x=input; IS_NOT_EMPTY(x); x=CDR(x) )
    {
      //printf("@@@@@@@@PROCESSING=|"); dump_atom(CAR(x),1); printf("|\n");

      int submethod = IS_LIST(CAR(x))?TOKENIZE_DATA:method;
      atom_t element = tokenize( CAR(x), submethod );
      //printf("@@@@@@@@TOKENIZED_INTO=|"); dump_atom(element,1); printf("|\n");

      if( IS_ERROR(element) )
      {
        DEUSE( result );
        //DEUSE( last );
        result = element;
        break;
      }

      int initial_flags = GET_FLAGS( x, FLAG_NEWLINE|FLAG_AFTER_SPACE );
      int final_flags   = GET_FLAGS( x, FLAG_BEFORE_SPACE );

      //printf(">>>CAR(x)  = "); dump_atom(CAR(x),1); printf("\n");
      //printf(">>>element = "); dump_atom(element,1); printf("\n");
      if( IS_FLOAT(element) )
      {
        append( USE(element), &result, &last );
        SET_FLAGS( last, initial_flags|final_flags );
      }
      else if( IS_LIST(CAR(x)) )
      {
        if( IS_EXTENDED(x) )
        {
          append_ex( USE(element), &result, &last );
          DEUSE( POS(last) );
          POS( last ) = USE( POS(x) );
        }
        else
        append( USE(element), &result, &last );
        SET_FLAGS( last, initial_flags|final_flags );
      }
      else
      {
        for( y=element; IS_NOT_EMPTY(y); y=CDR(y) )
        {
          //printf("APPEND SUBELEMENT |");
          //dump_atom(CAR(y),1);
          //printf("|\n");

          if( IS_EXTENDED(y) )
          {
            append_ex( USE(CAR(y)), &result, &last );
            DEUSE( POS(last) );
            POS( last ) = USE( POS(y) );
          }
          else
          append( USE(CAR(y)), &result, &last );
          SET_FLAGS( last, FLAGS(y) );
          if( y==element ) SET_FLAGS( last, initial_flags );
        }
        #ifdef SAFE_MODE
        assert( IS_NOT_EMPTY(last) );
        #endif
        SET_FLAGS( last, final_flags);
      }

      DEUSE( element );
    }

    //printf("FINAL RESULT IS |"); dump_atom(result,1); printf("|\n");
    return result;
  }

  // The input is a word or a subword
  chars_t source  = STRING(input);
  int     len     = LENGTH(input);
  int     origlen = len;

  chars_t buffer = ALLOC( CHAR_SIZE*len ); // buffer for the longest word
  //chars_t bp     = buffer;
  char_t  ch;

  int_t   errpos    = -1;
  //char_t  errchar   = NULL_CHAR;

  int last_token = TOKEN_LINEEND;
  //int crlf = 0;

  // Gets the next token. Return:
  //  TOKEN_END    if there are no more tokens
  //  TOKEN_WORD   if the token is a word
  //  TOKEN_DIRTY_WORD if the token is a word with \ or |
  //  TOKEN_OPEN   if the token is [
  //  TOKEN_CLOSE  if the token is ]
  //  TOKEN_LINEEND   if the token is <nl>
  //    TOKEN_SPACE   if at least one whitespace is met


  //int co=0;
  int get_token( atom_t *token, int method )
  {
    //co++;
    //if (0 == co%1024)
    //{
    //printf("%d ",co);
    //}
    // return 1 if buffer contains number
    int is_number(chars_t bp)
    { // "E" {digit}* "." {digit}+
      //  1     2      3     4
      chars_t cp = bp;
      int num_mode = 1;
      char_t ch;

      cp = bp;

      if( bp==buffer ) return 0;

      while( cp>buffer )
      {
        cp--;
        ch = *cp;
        //printf("num_mode=%d ch=%C\n",num_mode,ch);
        switch( num_mode )
        {
        case 1:
          if( ch!=TEXT('E') && ch!=TEXT('e') ) return 0;
          num_mode = 2;
          break;
        case 2: ;
          if( ch<TEXT('0') || ch>TEXT('9') ) return 0;
          num_mode = 3;
          break;
        case 3: ;
          if( ch!=TEXT('.') ) return 0;
          num_mode = 4;
          break;
        case 4: ;
          if( ch<TEXT('0') || ch>TEXT('9') ) return 0;
          break;
        }
        //printf("num_mode=%d\n",num_mode);
      }
      return 1;
    }

    if( !len ) return TOKEN_END;

    int dirty = 0;
    int mutated = 0;
    chars_t bp = buffer; *bp=NULL_CHAR;
    chars_t sp = source;

    int mode = MODE_ENTRY; // current mode
    int code;        // action code


    int stack[MAX_MODE];   // return-to-mode for each mode
    static int mode_eof[MAX_MODE] =
    {
      /* entry */ PAT_TOKEN_END,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_ERROR,
      /* backslashed */ PAT_ERROR,
      /* tilde */ PAT_ERROR,
      /* semitilde   */ PAT_ERROR,
      /* semicolon   */ PAT_RETURN,
      /* tildespace  */ PAT_ERROR,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_eol[MAX_MODE] =
    {
      /* entry */ PAT_NEXT+PAT_TOKEN_LINE,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_NEXT+PAT_RETURN,
      /* semitilde   */ PAT_NEXT+PAT_RETURN,
      /* semicolon   */ PAT_RETURN,
      /* tildespace  */ PAT_NEXT+PAT_RETURN,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_space[MAX_MODE] =
    {
      /* entry */ PAT_NEXT+PAT_GOTO_SPACE,
      /* whitespace  */ PAT_NEXT,
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_NEXT+PAT_GOTO_TILDESPACE,
      /* semitilde   */ PAT_NEXT,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_NEXT,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_open[MAX_MODE] =
    {
      /* entry */ PAT_NEXT+PAT_TOKEN_OPEN,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_close[MAX_MODE] =
    {
      /* entry */ PAT_NEXT+PAT_TOKEN_CLOSE,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_bar[MAX_MODE] =
    {
      /* entry */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_RETURN_TO_WORD+PAT_GOTO_BARRED+PAT_ERROR_POS,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_GOTO_BARRED+PAT_RETURN_TO_SELF+PAT_ERROR_POS,
      /* barred   */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_GOTO_BARRED+PAT_RETURN_TO_SELF+PAT_ERROR_POS,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_backslash[MAX_MODE] =
    {
      /* entry */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_RETURN_TO_WORD+PAT_GOTO_BACKSLASHED+PAT_ERROR_POS,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_RETURN_TO_SELF+PAT_GOTO_BACKSLASHED+PAT_ERROR_POS,
      /* barred   */ PAT_PUSH+PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_BACKSLASHED,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT2, //+PAT_RETURN_TO_SELF+PAT_GOTO_BACKSLASHED,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_tilde[MAX_MODE] =
    {
      /* entry */ PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_TILDE+PAT_ERROR_POS,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_NEXT+PAT_MUTATED+PAT_RETURN_TO_SELF+PAT_GOTO_TILDE+PAT_ERROR_POS,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT+PAT_MUTATED+PAT_RETURN_TO_CALLER+PAT_GOTO_SEMITILDE+PAT_ERROR_POS,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_semicolon[MAX_MODE] =
    {
      /* entry */ PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_SEMICOLON,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_NEXT+PAT_MUTATED+PAT_RETURN_TO_SELF+PAT_GOTO_SEMICOLON,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_SEMITILDE,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_else[MAX_MODE] =
    {
      /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_WORD,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_PUSH+PAT_NEXT,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_parens[MAX_MODE] =
    {
      /* entry */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_equal[MAX_MODE] =
    {
      /* entry */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
      /* whitespace  */ PAT_TOKEN_SPACE,
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD, 
      /* greater  */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD
    };
    static int mode_less[MAX_MODE] =
    {
      /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_LESS,
      /* whitespace  */ PAT_PUSH+PAT_NEXT+PAT_GOTO_LESS, //PAT_TOKEN_WORD, @boza
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };
    static int mode_greater[MAX_MODE] =
    {
      /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_GREATER,
      /* whitespace  */ PAT_PUSH+PAT_NEXT+PAT_GOTO_GREATER, //PAT_TOKEN_WORD, @boza
      /* word     */ PAT_TOKEN_WORD,
      /* barred   */ PAT_PUSH+PAT_NEXT,
      /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
      /* tilde */ PAT_TILDE+PAT_RETURN,
      /* semitilde   */ PAT_RETURN,
      /* semicolon   */ PAT_NEXT,
      /* tildespace  */ PAT_TILDE+PAT_TOKEN_WORD,
      /* less     */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD, 
      /* greater  */ PAT_TOKEN_WORD
    };


again:
    // get action code
    GET_CHAR;
    
    
#ifdef DEBUG_TOKENS
    printf("<TOKENS> length=%d\n",len);
    if(ch<TEXT(' '))
    printf("<TOKENS> get(#%d)\t",ch);
    else
    printf("<TOKENS> get('%c')\t",ch);
    switch( mode )
    {
    case MODE_ENTRY:    printf("     ENTRY -> "); break;
    case MODE_SPACE:    printf("     SPACE -> "); break;
    case MODE_WORD:     printf("      WORD -> "); break;
    case MODE_BARRED:      printf("    BARRED -> "); break;
    case MODE_BACKSLASHED: printf(" BACKSLASH -> "); break;
    case MODE_TILDE:    printf("     TILDE -> "); break;
    case MODE_SEMITILDE:   printf(" SEMITILDE -> "); break;
    case MODE_SEMICOLON:   printf(" SEMICOLON -> "); break;
    case MODE_TILDESPACE:  printf("TILDESPACE -> "); break;
    }
#endif
    code = mode_else[mode];
    if( !len )
    code = mode_eof[mode];
    else
    {
      if( method==TOKENIZE_READWORD )
      { // tokenize as expected by READWORD
        if( ch==TEXT('|')  ) code = mode_bar[mode];
        else if( ch==TEXT('\\') ) code = mode_backslash[mode];
      }
      else
      if( method==TOKENIZE_READLIST )
      { // tokenize as data
        if(      ch==TEXT('\n') ) code = mode_eol[mode];
        else if( ch==TEXT('\r') ) code = mode_eol[mode];
        else if( ch<=TEXT(' ') ) code = mode_space[mode];
        else if( ch==TEXT('[')  ) code = mode_open[mode];
        else if( ch==TEXT(']')  ) code = mode_close[mode];
        else if( ch==TEXT('|')  ) code = mode_bar[mode];
        else if( ch==TEXT('\\') ) code = mode_backslash[mode];
        else if( ch==TEXT('~')  ) code = mode_tilde[mode];
      }
      else
      if( method==TOKENIZE_DATA )
      { // tokenize as data
        if(      ch==TEXT('\n') ) code = mode_eol[mode];
        else if( ch==TEXT('\r') ) code = mode_eol[mode];
        else if( ch<=TEXT(' ') ) code = mode_space[mode];
        else if( ch==TEXT('[')  ) code = mode_open[mode];
        else if( ch==TEXT(']')  ) code = mode_close[mode];
        else if( ch==TEXT('|')  ) code = mode_bar[mode];
        else if( ch==TEXT('\\') ) code = mode_backslash[mode];
        else if( ch==TEXT('~')  ) code = mode_tilde[mode];
        else if( ch==TEXT(';')  ) code = mode_semicolon[mode];
      }
      else
      { // tokenize as commands
        //printf("ch=%c *buf=%c\n",ch,*buffer);
        if( ch==TEXT('(') ||
            ch==TEXT(')') ) code = mode_parens[mode];
        else if( *buffer!=TEXT('"') )
        {
          if( (ch==TEXT('+') || ch==TEXT('-')) && is_number(bp) ) { }
          else if( ch==TEXT('+') || 
              ch==TEXT('-') || 
              ch==TEXT('*') ||
              ch==TEXT('/'))  code = mode_parens[mode];
          else if( ch==TEXT('=') ) code = mode_equal[mode];
          else if( ch==TEXT('<') ) code = mode_less[mode];
          else if( ch==TEXT('>') ) code = mode_greater[mode];
        }
      }
    }
    // process action code
    int _stack = -1;

    int newmode = 0;
    if( code & PAT_GOTO )
    {
      // this is pred-processing of GOTO
      // if old mode was space, and new is not,
      // then update initial position of next token
      newmode = (code>>PAT_SHIFT) & 0xF;
      if( mode==MODE_SPACE && newmode!=MODE_SPACE ) { sp = source; }
    }


    if( code & PAT_TILDE ) 
    {
      *bp++ = TEXT('~');
      #ifdef DEBUG_TOKENS
      printf("\n<TOKENS> put('%c'/%d)\n",TEXT('~'),TEXT('~'));
      #endif //DEBUG_TOKENS
    }
    if( code & PAT_PUSH )  
    {
      // push a character only if:
      // - currently not in bars |..?..|
      // - currently in bars, but not in semicolon ;..|..?..|
      if( mode!=MODE_BARRED || 
          (stack[mode]!=MODE_SEMITILDE &&
            stack[mode]!=MODE_SEMICOLON) )
      {
        //if( mode==MODE_BARRED || mode==MODE_BACKSLASHED )
        //*bp++ = ENBAR(ch);
        //else
        *bp++ = ch;
        #ifdef DEBUG_TOKENS
        printf("\n<TOKENS> put('%c'/%d)\n",ch,ch);
        #endif //DEBUG_TOKENS
        //if( ch=='\r' && *(source+1)=='\n' ) // handle CRLF cases
        //{
        //*bp++ = '\n';
        //#ifdef DEBUG_TOKENS
        //  printf("\n<TOKENS> put('%d')\n",'\n');
        // #endif //DEBUG_TOKENS
        //}
      }
    }
    if( code & PAT_NEXT2 )
    {
      source++;
      len--;
    }
    if( code & (PAT_NEXT|PAT_NEXT2) )
    {
      if( *source=='\r' )
      {
        //crlf = 0;
        if( *(source+1)=='\n' )
        {
          //crlf = 1;
          source++;
          len--;
        }
      }
      source++;
      len--;
    }
    if( code & PAT_DIRTY ) dirty = 1;
    if( code & PAT_MUTATED )  mutated = 1;
    if( code & PAT_ERROR_POS )   errpos = origlen-len-1;
    if( code & PAT_RETURN_TO_WORD ) _stack = MODE_WORD;
    if( code & PAT_RETURN_TO_SELF ) _stack = mode;
    if( code & PAT_RETURN_TO_CALLER ) _stack = stack[mode];
    if( code & PAT_GOTO )
    {
      // this is post-processing of GOTO
      mode = newmode;
      stack[mode] = _stack;
    }
    if( code & PAT_RETURN )   mode = stack[mode];
    if( code & PAT_TOKEN ) 
    {
      int _token = (code>>PAT_SHIFT) & 0xF;
      if( _token!=TOKEN_WORD ) return _token;
      if( mutated )
      {
        *bp = NULL_CHAR;
        *token  = new_word( buffer, UNKNOWN );
        #ifdef DEBUG_TOKENS
        printf("MUTATED TOKEN "); dumpln(*token);
        printf("\n\n");
        #endif
      }
      else
      {
        *token = new_subword( input, sp, source-sp /*bp-buffer*/ );
        #ifdef DEBUG_TOKENS
        printf("NORMAL TOKEN **"); dump(*token);
        printf("** (len=%d)\n\n\n",source-sp);
        #endif
      }
      return dirty?TOKEN_DIRTY_WORD:TOKEN_WORD;
    }
    if( code & PAT_ERROR )
    {
      #ifdef DEBUG_TOKENS
      printf("ERROR\n");
      #endif
      return TOKEN_ERROR;
    }

#ifdef DEBUG_TOKENS
    switch( mode )
    {
    case MODE_ENTRY:    printf("ENTRY\n"); break;
    case MODE_SPACE:    printf("SPACE\n"); break;
    case MODE_WORD:     printf("WORD\n"); break;
    case MODE_BARRED:      printf("BARRED\n"); break;
    case MODE_BACKSLASHED: printf("BACKSLASH\n"); break;
    case MODE_TILDE:    printf("TILDE\n"); break;
    case MODE_SEMITILDE:   printf("SEMITILDE\n"); break;
    case MODE_SEMICOLON:   printf("SEMICOLON\n"); break;
    case MODE_TILDESPACE:  printf("TILDESPACE\n"); break;
    }
#endif
    goto again;
  } // get_token()


  atom_t get_sublist( int level, int full_parse, atom_t* pos ) //ex2//
  {
    atom_t result  = empty_list;
    atom_t last    = empty_list;
    if( pos ) *pos = NULL; //ex2//

    atom_t token   = NULL;
    atom_t sublist_pos = NULL; //ex2//
    int    flags;
    int bracketlen = len;  // LEN of last opening bar

    int pos_from = source-STRING(input); //ex2//
    if( pos_from ) pos_from--; //ex2//

    flags = 0;//FLAG_NEWLINE;
    while( (last_token=get_token(&token,full_parse)) )
    {
      sublist_pos = NULL;

      #ifdef DEBUG_TOKENIZATION
      switch(last_token)
      {
      case TOKEN_END:    printf("TOKEN_END\n"); break;
      case TOKEN_SPACE:  printf("TOKEN_SPACE\n"); break;
      case TOKEN_WORD:   printf("TOKEN_WORD    @"); dump(token); printf("@\n"); break;
      case TOKEN_DIRTY_WORD: printf("TOKEN_|WORD|  @"); dump(token); printf("@\n"); break;
      case TOKEN_OPEN:   printf("TOKEN_OPEN    [\n"); break;
      case TOKEN_CLOSE:  printf("TOKEN_CLOSE   ]\n"); break;
      case TOKEN_LINEEND:   printf("TOKEN_LINEEND\n"); break;
      case TOKEN_ERROR:  printf("TOKEN_ERROR\n"); break;
      }
      #endif

      if( last_token==TOKEN_ERROR ) return result;
      if( last_token==TOKEN_SPACE ) 
      {
        if( IS_NOT_EMPTY(last) ) SET_FLAGS( last, FLAG_BEFORE_SPACE );
        flags |= FLAG_AFTER_SPACE;
        continue;
      }
      if( last_token==TOKEN_DIRTY_WORD )
      {
        //printf("###BEFORE="); dumpln(token);
        atom_t x = purify( token );
        DEUSE( token );
        token = x;
        //printf("###AFTER="); dumpln(token);
      }
      if( last_token==TOKEN_CLOSE ) break;
      if( last_token==TOKEN_LINEEND )
      {
        flags |= FLAG_NEWLINE;
        continue;
      }
      if( last_token==TOKEN_OPEN)
      {
        token = get_sublist( level+1, TOKENIZE_DATA, &sublist_pos ); // recursive //ex2//

        if( last_token==TOKEN_ERROR )
        {
          DEUSE( token );
          if( sublist_pos ) DEUSE(sublist_pos);
          sublist_pos = NULL;
          break;
        }
      }

      if( method==TOKENIZE_COMMANDS )
      flags |= FLAG_TOKENIZED_COMMANDS;

      if( method==TOKENIZE_DATA )
      flags |= FLAG_TOKENIZED_DATA;

      if( method==TOKENIZE_COMMANDS &&
          last_token==TOKEN_WORD &&
          LENGTH(token)>1 &&
          *STRING(token)==TEXT('?') &&
          *(STRING(token)+1)>=TEXT('0') &&
          *(STRING(token)+1)<=TEXT('9') )
      {
        //printf(">>>%d %d\n", last_token==TOKEN_WORD, last_token==TOKEN_DIRTY_WORD);
        // process template ?nn->(? nn) for command tokenization
        //printf("append token **"); dump(token); printf("**\n");
        atom_t new_qoken = new_subword( token, STRING(token), 1 );
        atom_t new_token = new_subword( token, STRING(token)+1, LENGTH(token)-1 );
        DEUSE( token );

        append( new_word(TEXT("("),-1), &result, &last );   // (
        append( new_qoken, &result, &last );    // ?
        append( new_token, &result, &last );    // nn
        append( new_word(TEXT(")"),-1), &result, &last );   // )
      }
      else
      {
        // normal token, no more processing needed
        if( sublist_pos ) //ex//
        {
          append_ex( token, &result, &last );
          POS( last ) = sublist_pos;

          //printf("\n\nSET EXTENDED POSITION ");
          //dump_atom(sublist_pos,1); printf("\n");
          //printf("CURRENT RESULT ");
          //dump_atom(result,1); printf("\n\n");

          sublist_pos = NULL;
        }
        else
        {
          append( token, &result, &last );
        }
        SET_FLAGS( last, flags );
      }

      flags = 0;
      if( last_token==TOKEN_ERROR ) break;
    }

    // test for unmatching square brackets
    // i.e. ...[... or ...]...
    if( level )
    {
      if( last_token==TOKEN_END )
      {
        errpos = origlen-bracketlen-1;
        last_token = TOKEN_ERROR;
      }
    }
    else
    {
      if( last_token==TOKEN_CLOSE )
      {
        errpos = origlen-len-1;
        last_token = TOKEN_ERROR;
      }
    }

    int pos_to = source-STRING(input);
    if( pos_to ) pos_to--;

    if( pos && method == TOKENIZE_DATA ) //ex2//
    {
      *pos = new_subword( input, STRING(input)+pos_from, pos_to-pos_from+1 );
    }

    return result;
  } // get_sublist()

  atom_t result = get_sublist( 0, method, NULL ); //ex//
  
  // in case of error return empty list
  if( last_token==TOKEN_ERROR )
  {
    //printf("ERROR RESULT=");dumpln(result);
    DEUSE( result );
    result = new_parse_error( ERROR_INCOMPLETE_PAIR, errpos, input );
  }


  DEALLOC( buffer );
  //printf("#########");
  //dump_atom(result,1);
  //printf("######\n");

  return result;
}

atom_t trim_shell_comment ( atom_t word )

Parameters:

word	word containing source text

Returns:: atom with the source text with trimmed shell comment

Trims a shell comment from the beginning of the word. Shell comment can be only the first line if its first two characters are #!. If a shell comment is trimmed, then the result is a subword from the first character on the second line, otherwise the input word is returned as is but with increased reference count.

{
  #ifdef SAFEMODE
  assert( IS_WORD(word) || IS_SUBWORD(word) );
  #endif

  chars_t source = STRING(word);
  int_t   len    = LENGTH(word);

  // if there are no enough characters just exit
  if( LENGTH(word)<2 ) return USE(word);

  // if the first two characters are not #! then exit
  if( *source!=TEXT('#') || *(source+1)!=TEXT('!') ) return USE(word);

  // skip the line
  while( len && *source!=TEXT('\n') )
  {
    source++;
    len--;
  }

  // return a subword. Pay attention to always reference
  // the main host word because the input could be a word
  // or a subword.
  if( IS_WORD(word) )
  return new_subword( word, source, len );
  else
  return new_subword( WORD(word), source, len );
}

atom_t purify ( atom_t word )

Parameters:

word	word to be purified

Returns:: purified word

Purifies a word by processing all backslashes and bars. Returns a new word if needed. Assumes that the input needs purification.

{
  //return USE(word);
  #ifdef SAFEMODE
  assert( IS_WORD(word) || IS_SUBWORD(word) );
  #endif

  chars_t source = STRING(word);
  int_t   len    = LENGTH(word);

  chars_t buffer = alloca( CHAR_SIZE*len ); // buffer for the longest word
  chars_t bp = buffer;

  int need_enbar    = 0;
  int is_mutated    = 0; // set to 1 if the word is mutated
  int in_backslash  = 0;
  int in_bars       = 0;
  for( ; len; len--,source++ )
  {
    need_enbar = in_bars || in_backslash;
    if( in_backslash )
    {
      in_backslash = 0;
    }
    else if( *source==TEXT('\\') )
    {
      is_mutated = 1;
      in_backslash = 1;
      continue;
    }
    else if( *source==TEXT('|') )
    {
      is_mutated = 1;
      in_bars = !in_bars;
      continue;
    }
    if( need_enbar )
    *bp++ = ENBAR(*source);
    else
    *bp++ = *source;

    //if( need_enbar )
    //printf(" PURIFY %d %d\n",*source,ENBAR(*source));
    //else
    //printf(" PURIFY %d   \n",*source );
  }

  *bp = NULL_CHAR;

  if( is_mutated )
  return new_word( buffer, bp-buffer );
  else
  return USE(word);
}

atom_t parentheses ( atom_t input )

Parameters:

input flat list

Returns:: list with nested sublists

Processes all (...) of a flat list by making them as sublists. Returns ERROR_INCOMPLETE_PAIR if the parentheses are not paired well.

{
  atom_t result = empty_list;

  atom_t _paren( int level, atom_t openparen, int* last_flags )
  {
#ifdef DEBUG_PARENTHESES
    printf("<PAREN> ENTER LEVEL %d INPUT=", level);
    dumpln(input);
#endif //DEBUG_PARENTHESES

    *last_flags = 0;

    atom_t result = empty_list;
    atom_t last = empty_list;
    while( IS_NOT_EMPTY(input) )
    {
      // get current element
      atom_t elem = USE(CAR(input));
      int flags = FLAGS(input);

#ifdef DEBUG_PARENTHESES
      printf("<PAREN> READ ELEMENT ");
      dumpln(elem);
#endif //DEBUG_PARENTHESES

      // if it is ( then call _parse() recursively
      // if it is ) then exit current _parse()
      if( IS_WORD(elem)||IS_SUBWORD(elem) )
      if( LENGTH(elem)==1 )
      {
        if( *STRING(elem)==TEXT('(') ) // process (
        {
          int f;
          DEUSE( elem ); //compensate USE() when elem was retrieved
          input = CDR(input);
          elem = _paren( level+1, elem, &f );
          flags |= f;
          if( IS_ERROR(elem) )
          {
            DEUSE( result );
            return elem;
          }
        }
        else if( *STRING(elem)==TEXT(')') )// process )
        {
          DEUSE( elem ); //compensate USE() when elem was retrieved
          *last_flags = FLAGS(input);
          break;
        }
      }

#ifdef DEBUG_PARENTESES
      printf("<PAREN> APPEND ");
      dumpln(elem);
#endif //DEBUG_PARENTHESES

      // append current element
      if( IS_EXTENDED(input) )
      {
        append_ex( elem, &result, &last );
        DEUSE( POS(last) );
        POS( last ) = USE( POS( input ) );
      }
      else
      append( elem, &result, &last );
      FLAGS(last) |= flags;

      // empty list
      if( IS_EMPTY(input) ) break;

      // move to next element
      input = CDR(input);
    }

    if( level && IS_EMPTY(input) )
    {
      DEUSE( result );
      return new_error( ERROR_INCOMPLETE_PAIR, openparen );
    }
    if( !level && !IS_EMPTY(input) )
    {
      DEUSE( result );
      return new_error( ERROR_INCOMPLETE_PAIR, CAR(input) );
    }
    if( level && IS_EMPTY(result) )
    {
      return new_error( ERROR_EMPTY_EXPRESSION, openparen );
    }

#ifdef DEBUG_PARENTHESES
    printf("<PAREN> EXIT LEVEL %d RESULT=", level);
    dumpln(result);
#endif //DEBUG_PARENTHESES
    if( level ) SET_FLAGS( result, FLAG_EXPRESSION );
    return result;
  } // _paren()


  int f;
  result = _paren( 0, empty_list, &f );

  return result;
}

atom_t define_user_function	(	atom_t	source,
		atom_t	input,
		atom_t	parent
	)

Parameters:

source	word containing the source
input	contents between `TO` and `END`
parent	context var where creation is done

Returns:: var atom or error atom

Defines new user-defined command or function. The input contains the contents of the function -- these are all tokens between TO and END. Var creation is done in several steps:

a new var atom is created with some temporary name
the left inputs are determined and created as local variables to the var atom. Left inputs are all consequitive words which start with colon : and are on a single line
the first token which cannot be a left input is considered as a function name. This token must be a word which does not start with colon : or double quotes ".
the right inputs are all tokens after the name which start with colon : and are on the same line.
a "..." after the last right input (or after the procedure name if there are no right inputs) determines that the procedure has unlimited number of accpeted inputs.
the body of the function is everything after the last right input (or funtion name) till the end. The only compulsory element of a function is its name. The left and right inputs could be missing. The body could be empty.

{
  #ifdef DEBUG_TO_END
  printf("<TO-END> DEFINING=");
  dumpln( input);
  #endif

  //atom_t last;
  atom_t a = input;
  atom_t name;
  atom_t lhi; // last_header_item

  // create the function
  atom_t function = new_var( word_to, parent, 1 ); //DEUSE(to);
  need_descr2( function );
  SET_FLAGS( function, FLAG_FUNCTION );
  LARGS( function ) = 0;
  RARGS( function ) = 0;

  // collecting left parameters
  while( IS_NOT_EMPTY(a) && 
  !GET_FLAGS(a,FLAG_NEWLINE) && 
  IS_ANY_WORD(CAR(a)) &&
  (*STRING(CAR(a))==TEXT(':')) )
  {
    lhi = CAR(a);
    atom_t var = new_local_var( CAR(a), function, 1 );
    if( IS_ERROR(var) )
    {
      DEUSE( input );
      //DEUSE( function );
      LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
      return var;
    }

    SET_FLAGS( var, FLAG_VARIABLE );
    LARGS( function )++;
#ifdef SAFE_MODE
    assert( LARGS(function)<255 );
#endif
    a = CDR(a);
  }

  // collecting name
  if( IS_NOT_EMPTY(a) && 
      !GET_FLAGS(a,FLAG_NEWLINE) &&
      IS_ANY_WORD(CAR(a)) &&
      (*STRING(CAR(a))!=TEXT('"')) )
  {
    lhi = CAR(a);
    name = CAR(a);
    a = CDR(a);
  }
  else
  {
    DEUSE( input );
    return new_error( ERROR_MISSING_NAME, a );
  }

  //printf("<TO-END> DEFINING="); dumpln( name );

  // collecting right parameters
  while( IS_NOT_EMPTY(a) && 
  !GET_FLAGS(a,FLAG_NEWLINE) && 
  IS_ANY_WORD(CAR(a)) &&
  (*STRING(CAR(a))==TEXT(':')) )
  {
    lhi = CAR(a);
    atom_t var = new_local_var( CAR(a), function, 1 );
    if( IS_ERROR(var) )
    {
      DEUSE( input );
      //DEUSE( function );
      LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
      return var;
    }
    SET_FLAGS( var, FLAG_VARIABLE );
    RARGS( function )++;
#ifdef SAFE_MODE
    assert( RARGS(function)<255 );
#endif
    a = CDR(a);
  }

  // collect "..."
  if( IS_NOT_EMPTY(a) &&
      !GET_FLAGS(a,FLAG_NEWLINE) &&
      IS_ANY_WORD(CAR(a)) &&
      (LENGTH(CAR(a))==3) &&
      (*(STRING(CAR(a))+0)==TEXT('.')) &&
      (*(STRING(CAR(a))+1)==TEXT('.')) &&
      (*(STRING(CAR(a))+2)==TEXT('.')) )
  {
    SET_FLAGS( function, FLAG_INFINITE_ARGS );
    a = CDR(a);
  }


  // set offset of parameters
  int offset = BASE_OFFSET_PARAMS; // this is the start offset
  atom_t x;
  for( x = LOCALS(function); IS_NOT_EMPTY(x); x=CDR(x) )
  {
    //printf("set offset of "); dump(NAME(CAR(x))); printf(" to be %d\n",offset);
    OFFSET(CAR(x)) = offset;
    offset += sizeof( atom_t );
  }


  // check whether the function is already defined
  atom_t var = find_local_var( name, parent );
  if( var )
  {
    if( LARGS(var)!=LARGS(function) || RARGS(var)!=RARGS(function) )
    {
      LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
      atom_t result = new_error( ERROR_INCOMPATIBLE_REDEFINITION, input );
      DEUSE( input );
      return result;
    }

    // forget data of the old function and
    // reuse data of the new function
    DEUSE( FULLSOURCE(var) );
    DEUSE( SOURCE(var) );
    DEUSE( LOCALS(var) );
    DEUSE( BODY(var) );
    DEUSE( TREE(var) );
    DEUSE( BINARY(var) );

    TREE(var)   = empty_list;
    BINARY(var) = empty_list;

    BODY(var)   = USE(a); //USE(BODY(function));
    LOCALS(var) = USE(LOCALS(function));

    LOCALS(parent) = behead(LOCALS(parent));
    //DEUSE(function);
    function = var;
  }
  else
  {
    DEUSE( NAME(function) );
    NAME( function ) = USE(name);
    BODY( function ) = USE(a);
  }

  //LEVEL( function ) = level;
  ADDRESS( function ) = 0;
  PRIORITY( function ) = PRIORITY_FUN;

  // cut header and 'END' from source
  chars_t new_src = STRING(lhi)+LENGTH(lhi);
  int new_len = LENGTH(source) - (new_src-STRING(source)) - LENGTH(word_end);
  SOURCE( function ) = new_subword( source, new_src, new_len );
  FULLSOURCE( function ) = USE( source );

  //DEUSE( input );
  #ifdef DEBUG_TO_END
  printf("<TO-END> DEFINED FUNCTION "); dumpln(NAME(function));
  printf("<TO-END>           SOURCE "); dumpln(SOURCE(function));
  printf("<TO-END>           BODY   "); dumpln(BODY(function));
  printf("<TO-END>           TREE   "); dumpln(TREE(function));
  #endif

  //printf("@@@@@@DEFINE "); dump_atom(NAME(function),1);
  //printf(" IN PARENT "); dump_atom(NAME(PARENT(function)),1);
  //printf("@@@@@@\n");


  return function;
}

atom_t preparse	(	atom_t	input,
		atom_t	parent,
		int	level
	)

Parameters:

input	list with tokenized source code
parent	context var where preparsing is done
level	current level of nesting return source without `TO` ... `END` definitions

Pre-parses a flat tokenized list by extracting all local TO ... END definitions and creating corresponding local functions. Returns the remaining source code.

{
  #ifdef DEBUG_TO_END
  printf("<TO-END> ENTER1=");
  dumpln( input);
  #endif

  atom_t a = input;
  atom_t before_a = NULL;
  atom_t before_to = NULL;
  atom_t first_to = NULL;
  int to_end_depth = 0;
  while( IS_NOT_EMPTY(a) )
  {
    if( IS_ANY_WORD(CAR(a)) )
    {
      // found TO or its synonym
      if( same_words(CAR(a),word_to) || same_words(CAR(a),word_to_syn) )
      {
        if( to_end_depth==0 )
        {
          before_to = before_a;
          first_to = a;
        }
        to_end_depth++;
      }

      // found END
      if( same_words( CAR(a), word_end ))
      {
        // END without to ?
        if( to_end_depth==0 )
        {
          atom_t result = new_error( ERROR_INCOMPLETE_PAIR, CAR(a) );
          DEUSE( input );
          return result;
        }

        to_end_depth--;
        if( to_end_depth==0 )
        {
          // found END corresponding to TO.
          // extract the whole TO..END
          if( first_to==before_a )
          {
            DEUSE( input );
            return new_error( ERROR_EMPTY_TO_END, first_to );
          }

          atom_t word = new_subword(WORD(CAR(first_to)),STRING(CAR(first_to)),(STRING(CAR(a))-STRING(CAR(first_to))+LENGTH(word_end)));
          //atom_t word = empty_list;

          first_to = behead(first_to);
          //first_to = CDR(first_to);
          CDR(before_a) = empty_list;
          before_a = before_to;
          a = behead(a);
          if( before_to )
          CDR(before_a) = a; // TO is not first item
          else
          input = a; // TO is first item

          #ifdef DEBUG_TO_END
          printf("<TO-END> EXTRACT:");
          dumpln( first_to );
          printf("<TO-END> CURRENT INPUT:");
          dumpln( input );
          printf("<TO-END> DEFINE IN PARENT:");
          dumpln( parent );
          #endif

          // create new command/function
          atom_t var = define_user_function( word, (first_to), parent );
          DEUSE( word );
          if( IS_ERROR(var) ) 
          {
            DEUSE( input );
            return var;
          }
          DEUSE( first_to );

          // process recursively the function
          // for nested to-end definitions
          BODY(var) = preparse( BODY(var), var, level+1 );

          // next item is set explicitely,
          SET_FLAGS( a, FLAG_NEWLINE );
          first_to = NULL;
          before_to = NULL;
          continue;
        }
      }
    }

    // next item
    before_a = a;
    a = CDR(a);
  }
  
  // TO without END ?
  if( first_to )
  {
    atom_t result = new_error( ERROR_INCOMPLETE_PAIR, CAR(first_to) );
    DEUSE(input);
    return result;
  }

  #ifdef DEBUG_TO_END
  printf("<TO-END> LEFTOVERS=");
  dumpln( input);
  printf("\n");
  #endif

  return input;
}

parse	(	atom_t	input,
		atom_t	parent,
		int	top_level
	)

Parameters:

input	unparsed list
parent	parent var in which parsing is done
top_level	flag whether parsing is at its top-level

Returns:: parsed abstract syntax tree

Parses a list into a tree (aka LISP notation). The list is supposed to be tokenized as commands. The parent variable provides a context of variables which can be referenced from the parsed input.

running_compiled_code && bug #3442773

{
  atom_t   vars[MAX_ELEMS];
  atom_t   data[MAX_ELEMS];
  atom_t  poses[MAX_ELEMS]; // source position (if available)
  int    pris[MAX_ELEMS];
  int    usedby[MAX_ELEMS];
  int     largs[MAX_ELEMS]; // number of unlinked left arguments
  int     rargs[MAX_ELEMS]; // number of unlinked right arguments
  int count; // count of elements in above arrays

  atom_t aux = empty_list;  // additional lists to delete

  //printf("parse==");dumpln(input);

  #ifdef DEBUG_PARSE
  void dump_arrays( )
  {
    int i;
    for( i=0; i<count; i++ ) if( pris[i]>=PRIORITY_MIN )
    {
      printf("<PARSE>   %2d.[used by %2d; free=%d:%d] v%c0 pr=%d   =",
      i, usedby[i], largs[i], rargs[i], vars[i]?'#':'=', pris[i]);
      dump_atom(data[i],1);
      if( !IS_UNBOUND(poses[i]) )
      {
        printf(" @ ");
        dump_atom(poses[i],1);
      }
      printf("\n");
    }
    printf("\n");
  }
  #endif


  // Finds the first line of input. Return a pointer
  // to the first node after the line. Sets arrays
  // vars[], data[] and prios[]. 
  atom_t get_line( atom_t input )
  {
    int has_no_infinite = 1;
    count = 0;
    while( IS_NOT_EMPTY(input) )
    {
      atom_t elem = USE(CAR(input));
      //printf("testing element ");
      //dumpln(elem);

      #ifdef SAFEMODE
      assert( count<MAX_ELEMS );
      #endif
      data[count] = elem;
      vars[count] = IS_ANY_WORD(elem) ? find_var(elem,parent) : NULL;
      if( VAR && !IS_COMMAND(VAR) && !IS_FUNCTION(VAR) )
      {
        // If we have variable called 4 (e.g. MAKE 4 "BOZA)
        // then VAR!=NULL, but this is allowed only for
        // functions and commands, so set VAR=NULL!
        VAR=NULL;
      }
      pris[count] = (VAR ? PRIORITY(VAR) : PRIORITY_MAX);
      usedby[count] = -1;
      largs[count] = VAR ? LARGS(VAR) : 0;
      rargs[count] = VAR ? RARGS(VAR) : 0;
      poses[count] = IS_EXTENDED(input)?POS(input):unbound;

      if( VAR && IS_VARIABLE(VAR) )
      {
        printf("found variable as function: ");
        dump_atom(NAME(VAR),1);
        printf("\n");
      }

      // if element is a word not starting with ":" and
      // cannot be transfered into a number and does not
      // exist as a variable, then this is unknown function
      float64_t x;
      if( IS_ANY_WORD(elem) && 
          !VAR &&
          *STRING(elem)!=TEXT(':') &&
          *STRING(elem)!=TEXT('"') &&
          !atom_to_float(elem,&x) )
      {
        //DEUSE( input );
        DEUSE( elem );
        return new_error( ERROR_DO_NOT_KNOW, elem );
      }

      if( !top_level && VAR && has_no_infinite )
      {
        if( GET_FLAGS(VAR,FLAG_INFINITE_ARGS) )
        {
          rargs[count]=-1;
          has_no_infinite = 0;
        }
      }

      // test for unary
      if( VAR && GET_FLAGS(VAR,FLAG_CAN_BE_UNARY) && largs[count]==1 )
      {
        // case 1:   <spc> op <spc> -> binary
        // case 2:  <spc> op  -> unary
        // case 3:        op <spc>  -> binary
        // case 4:        op  -> binary
        if( GET_FLAGS(input,FLAG_AFTER_SPACE|FLAG_BEFORE_SPACE)
            == FLAG_AFTER_SPACE )
        largs[count]=0;
      }

      count++;
      //next:
      input = CDR(input);

      // if the next item is on a new line AND if it is
      // not constant-list AND we are not in a sublist
      // then break the line here
      if( top_level && IS_NOT_EMPTY(input) && GET_FLAGS(input,FLAG_NEWLINE) &&
          (!IS_LIST(CAR(input)) || IS_EXPRESSION(CAR(input))) )
      break;
    } // line while

    return input;
  } //get_line()

  #ifdef DEBUG_PARSE
  printf("<PARSE> ENTER=");
  dumpln( input);
  #endif


  //-----------------------------------------
  // STEP 1: Convert (..) into subexpressions
  //-----------------------------------------
  if( IS_EMPTY(input) ) return input;
  if( IS_ERROR(input) ) return input;
  atom_t orig_input = input = parentheses( input );
  if( IS_ERROR(input) ) return input;

  #ifdef DEBUG_PARSE
  printf("<PARSE> PARENTHESISEZ=");
  dumpln( input);
  #endif

  // if there is no parent consider the root as parent
  if( !parent ) parent = root;


  //-----------------------------------------
  // STEP 2-7: Convert list into lisp line-by-line
  //-----------------------------------------
  atom_t result = empty_list;
  atom_t last = empty_list;

  atom_t err = NULL;

  int i,j,finished;
  int* args;

#define LINK \
    { \
      usedby[j] = i; \
      args[i]--; \
    }
  //printf("%d is used by %d\n",j,i);         

#define RELINK \
    { \
      if( j<usedby[j] ) { largs[usedby[j]]++; finished=0; } \
      if( j>usedby[j] ) { rargs[usedby[j]]++; finished=0; } \
      LINK; \
    }

  while( IS_NOT_EMPTY(input) ) // main while
  {
    next_line: 
    #ifdef DEBUG_PARSE
    printf("\n\n");
    printf("<PARSE>------------------\n");
    printf("<PARSE> START A NEW LINE \n");
    printf("<PARSE>------------------\n");
    #endif

    //-----------------------------------
    // STEP 2: Get a line from the input
    //-----------------------------------
    input = get_line( input );
    if( IS_ERROR(input) )
    {
      err = input;
      break;
    }
    #ifdef DEBUG_PARSE
    printf("<PARSE> ORIGINAL PARSE_LIST:\n");
    dump_arrays( );
    #endif
    //printf("<PARSE> LEFTOVER:"); dumpln(input);
    //printf("------------------------------------\n");

    
    //-----------------------------------
    // STEP 3: Analyze and set dependencies between items
    //-----------------------------------
    finished = 0;
    while( !finished )
    {
      finished = 1;
      for( i=0; i<count; i++ )
      {

        // now process right parameters
        args = rargs;
        for( j=i+1; args[i] && j<count; j++ )
        {
          if( vars[j] && !IS_FUNCTION(vars[j]) )
          break;
          if( usedby[j]==-1 )
          LINK
          else
          if( (usedby[j]>j || usedby[j]<i) && 
              (pris[usedby[j]]<=pris[i]) )
          RELINK;
        } // for largs

        // now process left parameters
        args = largs;
        for( j=i-1; args[i] && j>=0; j-- )
        {
          //if(vars[j])
          //{
          //printf("j=%d cond=%d var=",j,!IS_FUNCTION(vars[j]));
          //dumpln(vars[j]);
          //}
          if( vars[j] && !IS_FUNCTION(vars[j]) )
          break;
          if( usedby[i]==j ) break;
          if( usedby[j]==-1 )
          {
            /*if( usedby[i]!=j )*/ LINK; // skip direct circular references
          }
          else
          if( (usedby[j]<j || usedby[j]>i) && 
              (pris[usedby[j]]<pris[i]) ) 
          {
            /*if( usedby[i]!=j )*/ RELINK; // skip direct circular reference
          }
        } // for largs
      } // for each element
    } // while not finished

    #ifdef DEBUG_PARSE
    printf("<PARSE> FINAL PARSE_LIST:\n");
    dump_arrays( );
    #endif


    //-----------------------------------
    // STEP 4: Check for extra or missing arguments
    //-----------------------------------
    #ifdef DEBUG_PARSE
    printf("<PARSE> START STEP 4:\n");
    dump_arrays( );
    #endif

    args = rargs;
    int top_level_pos = -1;
    for( j=0; j<count; j++ )
    {
      // check whether there are two or more free items
      // only one is allowed for non-top levels
      if( usedby[j]==-1 )
      {
        if( !top_level && top_level_pos>=0 )
        {
          // we are ready to announce that there are extra values
          // but before doing this check whether there is function
          // that would be so polite to take any of them
          //printf(">>> top_level_pos=%d\n",top_level_pos);
          if( rargs[top_level_pos]==0 && 
              vars[top_level_pos] &&
              GET_FLAGS(vars[top_level_pos],FLAG_MAY_HAVE_EXTRA_ARG) )
          {
            i = top_level_pos;
            //printf(">>> attach %d to %d\n",j,i);
            //printf(">>> rargs
            LINK;
            //printf(">>>>>>>"); dumpln(NAME(vars[top_level_pos]));
          }
          else
          {
            err = new_error( ERROR_CROWDED_EXPRESSION, data[j] );
            break;
          }
        }
        if( top_level_pos<0 ) top_level_pos = j;
      }

      // left paramers can be skipped only for unary functions
      if( largs[j] && !GET_FLAGS(vars[j],FLAG_CAN_BE_UNARY) )
      {
        err = new_error( ERROR_MISSING_LEFTS, data[j] );
        break;
      }

      // some functions may ignore 1 missing right parameter
      if( rargs[j]==1 && GET_FLAGS(vars[j],FLAG_MAY_SKIP_LAST_ARG) )
      continue;

      // when arguments can be infinite, this also mean any number
      if( rargs[j] && !GET_FLAGS(vars[j],FLAG_INFINITE_ARGS) )
      {
        //printf("rargs=%d\n",rargs[j]);
        err = new_error( ERROR_MISSING_RIGHTS, data[j] );
        break;
      }
    }

    if( err ) break;


    //-----------------------------------
    // STEP 5: Recursivelly process nested expressions
    //     and arguments which must be lisp'ed too
    //-----------------------------------
    #ifdef DEBUG_PARSE
    printf("<PARSE> START STEP 5:\n");
    dump_arrays( );
    #endif
    for( i=0; i<count; i++ ) if( IS_LIST(data[i]) )
    {
      if( GET_FLAGS(data[i],FLAG_EXPRESSION) )
      { // nested expression
        atom_t expr = parse( data[i], parent, 0 );
        
        if( IS_ERROR(expr) ) 
        {
          err = expr;
        }
        else
        {
          DEUSE( data[i] );
          data[i] = expr;
          SET_FLAGS( data[i], FLAG_EXPRESSION );
        }
      }
      else
      { // possibly argument to be reparsed
        j=usedby[i];
        if( (j>=0) && vars[j] && GET_FLAGS(vars[j],FLAG_PROCESS_ARGS) )
        {
          atom_t expr = build_syntax_subtree( parent, data[i] );
          if( IS_ERROR(expr) ) 
          {
            err = expr;
          }
          else
          {
            DEUSE( data[i] );
            data[i] = expr;
            SET_FLAGS( data[i], FLAG_EXPRESSION+FLAG_WAS_LIST_CONST );
          }
        }
      }
    }
    if( err ) break;


    //-----------------------------------
    // STEP 6: Group items of subexpressions, process numbers
    //-----------------------------------
    #ifdef DEBUG_PARSE
    printf("<PARSE> START STEP 6:\n");
    dump_arrays( );
    #endif
    for( i=0; i<count; i++ )
    {
      /*
      // convert all numeric data into numbers
      float64_t n;
      if( IS_ANY_WORD(data[i]) && atom_to_float(data[i],&n) )
        {
          DEUSE( data[i] );
          data[i] = new_float( n );
        }

      // create a list node for each element
      // whatever the element is, this list node will be used
      data[i] = new_list( data[i], empty_list );
      SET_FLAGS( data[i], FLAG_EXPRESSION );
      */
      
      // create a list node for each element
      // whatever the element is, this list node will be used
      float64_t n;
      if( IS_ANY_WORD(data[i]) && atom_to_float(data[i],&n) )
      {
        atom_t pos = data[i]; // old source of the number
        data[i] = new_list_ex( new_float(n), empty_list );
        POS( data[i] ) = pos;
      }
      else
      {
        data[i] = new_list( data[i], empty_list );
      }
      SET_FLAGS( data[i], FLAG_EXPRESSION );
    }

    for( i=0; i<count; i++ )
    {
      if( usedby[i]>=0 )
      {
        atom_t a = data[usedby[i]];
        atom_t name = CAR(data[i]);
        atom_t function = vars[usedby[i]];
        //printf("name(%d)=",i);dumpln(name);
        //printf("function(%d)=",usedby[i]);dumpln(function);
        //printf("a(%d)=",usedby[i]);dumpln(a);

        // if the item is word constant and is used by
        // a function which creates variables, then do
        // create the variables now.
        if( GET_FLAGS(function,FLAG_SET_ALL_VARS)
            && IS_ANY_WORD(name)
            && LENGTH(name)>1
            && *STRING(name)==TEXT('"') )
        {
          //printf("create local "); dumpln(name);
          //printf("          in "); dumpln(NAME(parent));
          atom_t var = new_local_var( name, parent, 1 );
          if( IS_ERROR(var) ) err = var;
          SET_FLAGS( var, FLAG_VARIABLE );
        }

        // if the item is word constant and is used by
        // a function which creates a single variable
        // (like MAKE), then do create the variable as
        // global if it does not exist
        // 2010.06.26: create the variable only if the
        // parsing has not been activated at run-time.
        atom_t real_name = NULL;
        if( !running_compiled_code
            && GET_FLAGS(function,FLAG_SET_ONE_VAR)
            && (i==usedby[i]+1)
            && (IS_INTEGER(name) || IS_FLOAT(name)) )
        {
          real_name = atom_to_word( name );
          goto use_numeric_name;
        }

        if( 
            GET_FLAGS(function,FLAG_SET_ONE_VAR)
            && (i==usedby[i]+1)
            && IS_ANY_WORD(name)
            && LENGTH(name)>1
            && *STRING(name)==TEXT('"') )
        {
          real_name = new_subword( name, STRING(name)+1, LENGTH(name)-1 );
          use_numeric_name:
          if( !find_var( real_name, parent ) )
          {
            atom_t var = new_local_var( real_name, parent/*root*/, 0 ); //printf("!2087!");
            if( IS_ERROR(var) ) err = var;
            SET_FLAGS( var, FLAG_VARIABLE );
          }
          DEUSE( real_name );

          // patch for FOR command. If the control
          // variable is ABC then create ABC and
          // ^ABC
          if( ADDRESS(function)==(int)rt_for )
          {
            assert( IS_ANY_WORD(name) );
            atom_t real_name = new_word( STRING(name), LENGTH(name) );
            *STRING(real_name) = L'^';
            if( !find_var( real_name, parent ) )
            {
              atom_t var = new_local_var( real_name, parent/*root*/, 0 ); //printf("!2103!");
              if( IS_ERROR(var) ) err = var;
              SET_FLAGS( var, FLAG_VARIABLE );
            }
            DEUSE( real_name );
          }
          
        }

        // if the item is word constant and is used by
        // a function which load libraries, then do
        // load the library now.
        if( ADDRESS(function)==(int)rt_load
            && IS_ANY_WORD(name)
            && LENGTH(name)>1
            && *STRING(name)==TEXT('"') )
        {
          atom_t real_name = new_word( STRING(name)+1, LENGTH(name)-1 );
          atom_t word = read_word( STRING(real_name) );
          DEUSE( real_name );

          if( IS_ERROR(word) ) { err = USE(word); break; }
          atom_t trimmed = trim_shell_comment( word );
          DEUSE( word );

          atom_t tokens1 = tokenize( trimmed, TOKENIZE_DATA );
          DEUSE( trimmed );
          if( IS_ERROR(tokens1) )
          {
            err = tokens1;
            break;
          }
          
          atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
          DEUSE( tokens1 );
          if( IS_ERROR(tokens2) ) return tokens2;

          tokens2 = preparse( tokens2, root, LEVEL(root) );
          atom_t body = parentheses( tokens2 );
          DEUSE( tokens2 );

          //printf("BODY=");dumpln(body);
          //printf("input=");dumpln(input);

          if( IS_NOT_EMPTY(body) )
          {
            atom_t a;
            for( a = body; IS_NOT_EMPTY(CDR(a)); a=CDR(a) );
            CDR(a) = USE(input);
            input = body;
          }
          
          //printf("old aux="); dumpln(aux);
          aux = new_list( body, aux );
          //printf("new aux="); dumpln(aux);
          for( i=0; i<count; i++ ) DEUSE( data[i] );
          goto next_line;
        }

        SET_FLAGS( a, FLAG_EXPRESSION );
        while( IS_NOT_EMPTY(CDR(a)) ) a=CDR(a);
        CDR(a) = vars[i] ? new_list(data[i],empty_list) : data[i];
      }
    }
    if( err ) break;


    //-----------------------------------
    // STEP 7: Group top-most items into result list
    //-----------------------------------
    #ifdef DEBUG_PARSE
    printf("<PARSE> START STEP 7:\n");
    dump_arrays( );
    #endif
    //printf("data[0]=");dumpln(data[0]);
    //printf("vars[0]=");dumpln(vars[0]);
    for( i=0; i<count; i++ )
    if( usedby[i]==-1 )
    {
      //printf("i=%d var=%x is_expr=%d\n",i,vars[i],IS_EXPRESSION(CAR(data[i])));
      //boza if( !vars[i] && !IS_EXPRESSION(CAR(data[i])))
      //boza  {
      //printf("i=%d\n",i);
      //boza err = new_error_atom( ERROR_UNUSED_VALUE, data[i] );
      //boza break;
      //boza }
      if( IS_UNBOUND(poses[i]) )
      {
        append( data[i], &result, &last );
      }
      else
      {
        append_ex( data[i], &result, &last );
        DEUSE( POS(last) );
        POS( last ) = USE( poses[i] );
      }
    }

    if( err ) break;
  } // main line


  //-----------------------------------------
  // STEP pre-8: Test for error and release unused resources
  //-----------------------------------------
  DEUSE( orig_input );
  DEUSE( aux );
  if( err )
  {
    //printf("ERRRRRRROR\n");
    for( i=0; i<count; i++ ) DEUSE( data[i] );
    DEUSE( result );
    return err;
  }

  //-----------------------------------
  // STEP 8: Remove unnecessary parentheses
  //-----------------------------------
  //printf("before=");dumpln(result);
  //
  // step 8 removed, because in some cases it removes more
  // parentheses than needed - e.g. when function body has
  // only one command
  // while( IS_EMPTY(CDR(result)) && 
  //   IS_LIST(CAR(result)) &&
  //   GET_FLAGS(CAR(result),FLAG_EXPRESSION) )
  //  {
  //    atom_t a = CAR(result);
  //    CAR(result) = empty_list;
  //    DEUSE( result );
  //    result = a;
  //  }
  //
  //printf(" after=");dumpln(result);

  if( !top_level ) SET_FLAGS( result, FLAG_EXPRESSION );

  #ifdef DEBUG_PARSE
  printf("<PARSE> EXIT=");
  dumpln(result);
  #endif

  if( OPTION_DUMP_AST )
  {
    outter( TEXT("Abstract Syntax Tree:\n\0"), UNKNOWN );
    dumpln( result );
    outter( TEXT("\n\0"), UNKNOWN );
  }

  return result;
}

atom_t build_syntax_tree ( atom_t func )

Parameters:

func	var atom for the parse context

Returns:: empty_list or an error atom

Parses completely a function. Its source is stored in its body as word, subword, data-tokenized list or command-tokenized list. Building algorithm:

tokenization of body as commands
extracting all TO ... ENDs and create them as subfunctions
parsing the func's body into abstract syntax tree
recursively build trees of subfunctions

{
  // exit is function is already treefied
  if( IS_NOT_EMPTY(TREE(func)) ) return empty_list;

  //printf("BUILD_SYNTAX_TREE(FUNC=");
  //dump(NAME(func));
  //printf(",SOURCE=");
  //dump(SOURCE(func));
  //printf(",BODY=");
  //dump(BODY(func));
  //printf(",LOCALS=");
  //dump(LOCALS(func));
  //printf(")\n\n");

  if( IS_EMPTY(BODY(func)) )
  {
    // Step 1. Tokenize
    //printf("SOURCE="); dumpln(SOURCE(func));
    atom_t tokens1 = tokenize( SOURCE(func), TOKENIZE_DATA );
    if( IS_ERROR(tokens1) ) return tokens1;
    //printf("TOKENS1="); dumpln(tokens1);

    atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
    DEUSE( tokens1 );
    if( IS_ERROR(tokens2) ) return tokens2;
    //printf("TOKENS2="); dumpln(tokens2);

    // Step 2. Extract TO..END's
    atom_t body = preparse( tokens2, func, LEVEL(func) );
    if( IS_ERROR(body) ) return body;
    DEUSE( BODY(func) );
    BODY(func) = body;
    //printf("BODY="); dumpln(BODY(func));
  }


  // Step 3. Parse function body
  //printf("BODY="); dumpln(BODY(func));
  atom_t tree = parse( BODY(func), func, 1 );
  if( IS_ERROR(tree) ) return tree;
  DEUSE(TREE(func));
  TREE(func) = tree;
  //printf("TREE="); dumpln(TREE(func));

  // because the might be some new TO..ENDs
  // scan all locals and build those which
  // have no trees
  atom_t local;
  atom_t locals;
  for( locals=LOCALS(func); IS_NOT_EMPTY(locals); locals=CDR(locals) )
  {
    local = CAR(locals);
    if( !DESCR2(local) ) continue;
    atom_t x = build_syntax_tree( local );
    if( IS_ERROR(x) ) return x;
  }

  return empty_list;
}

Variable Documentation

char_t debar[32]

Initial value:

<>|? */

char_t enbar[128]

table for |a|->a conversions

Defines

Tokentypes

Parse modes

Microprogram defines (basic)

Microprogram defines (compound)

Special characters

Define Documentation

Function Documentation

Variable Documentation