|
Lhogho
0.0.027
|
Defines | |
| #define | GET_CHAR ch=*source |
| get current character | |
| #define | VAR vars[count] |
| shortens access to vars array | |
| #define | LINK |
| #define | RELINK |
Tokentypes | |
| #define | TOKEN_END 0 |
| no more tokens available | |
| #define | TOKEN_WORD 1 |
| normal word token | |
| #define | TOKEN_DIRTY_WORD 2 |
| word token with unhandled backslashs or bars | |
| #define | TOKEN_OPEN 3 |
| open square bracket token | |
| #define | TOKEN_CLOSE 4 |
| close square bracket token | |
| #define | TOKEN_LINEEND 5 |
| end of line token | |
| #define | TOKEN_SPACE 6 |
| whitespace token | |
| #define | TOKEN_ERROR 7 |
| error occured | |
Parse modes | |
| #define | MODE_ENTRY 0 |
| entry mode | |
| #define | MODE_SPACE 1 |
| space mode | |
| #define | MODE_WORD 2 |
| word mode | |
| #define | MODE_BARRED 3 |
| barred mode | |
| #define | MODE_BACKSLASHED 4 |
| backslashed mode | |
| #define | MODE_TILDE 5 |
| tilde mode | |
| #define | MODE_SEMITILDE 6 |
| semitilde mode | |
| #define | MODE_SEMICOLON 7 |
| semicolon mode | |
| #define | MODE_TILDESPACE 8 |
| tildespace mode | |
| #define | MODE_LESS 9 |
| less mode | |
| #define | MODE_GREATER 10 |
| greater mode | |
| #define | MAX_MODE 11 |
| greatest mode + 1 | |
Microprogram defines (basic) | |
| #define | PAT_TILDE 0x000001 |
| push tilde | |
| #define | PAT_PUSH 0x000002 |
| push last character | |
| #define | PAT_NEXT 0x000004 |
| goto next character | |
| #define | PAT_DIRTY 0x000008 |
| set dirty flag | |
| #define | PAT_MUTATED 0x000010 |
| set mutated flag | |
| #define | PAT_RETURN_TO_WORD 0x000020 |
| next return will return to word mode | |
| #define | PAT_RETURN_TO_SELF 0x000040 |
| next return will return to current mode | |
| #define | PAT_GOTO 0x000080 |
| follows 4bit go-to-mode | |
| #define | PAT_TOKEN 0x000100 |
| follows 4bit token_id | |
| #define | PAT_RETURN 0x000200 |
| return to stack-to-mode | |
| #define | PAT_ERROR_POS 0x000400 |
| remember current position as error position | |
| #define | PAT_ERROR 0x000800 |
| generate error | |
| #define | PAT_RETURN_TO_CALLER 0x001000 |
| next return will return to the caller of this mode | |
| #define | PAT_NEXT2 0x002000 |
| goto next next character | |
Microprogram defines (compound) | |
| #define | PAT_SHIFT 16 |
| shift factor | |
| #define | PAT_TOKEN_END PAT_TOKEN+(TOKEN_END<<PAT_SHIFT) |
| microprogram for end token | |
| #define | PAT_TOKEN_SPACE PAT_TOKEN+(TOKEN_SPACE<<PAT_SHIFT) |
| microprogram for space token | |
| #define | PAT_TOKEN_WORD PAT_TOKEN+(TOKEN_WORD<<PAT_SHIFT) |
| microprogram for word token | |
| #define | PAT_TOKEN_LINE PAT_TOKEN+(TOKEN_LINEEND<<PAT_SHIFT) |
| microprogram for line end token | |
| #define | PAT_TOKEN_OPEN PAT_TOKEN+(TOKEN_OPEN<<PAT_SHIFT) |
| microprogram for open token | |
| #define | PAT_TOKEN_CLOSE PAT_TOKEN+(TOKEN_CLOSE<<PAT_SHIFT) |
| microprogram for close token | |
| #define | PAT_GOTO_SPACE PAT_GOTO+(MODE_SPACE<<PAT_SHIFT) |
| microprogram for goto space mode | |
| #define | PAT_GOTO_WORD PAT_GOTO+(MODE_WORD<<PAT_SHIFT) |
| microprogram for goto word mode | |
| #define | PAT_GOTO_BACKSLASHED PAT_GOTO+(MODE_BACKSLASHED<<PAT_SHIFT) |
| microprogram for goto backslashed mode | |
| #define | PAT_GOTO_BARRED PAT_GOTO+(MODE_BARRED<<PAT_SHIFT) |
| microprogram for goto barred mode | |
| #define | PAT_GOTO_SEMICOLON PAT_GOTO+(MODE_SEMICOLON<<PAT_SHIFT) |
| microprogram for goto semicolon mode | |
| #define | PAT_GOTO_TILDE PAT_GOTO+(MODE_TILDE<<PAT_SHIFT) |
| microprogram for goto tilde mode | |
| #define | PAT_GOTO_TILDESPACE PAT_GOTO+(MODE_TILDESPACE<<PAT_SHIFT) |
| microprogram for goto tildespace mode | |
| #define | PAT_GOTO_SEMITILDE PAT_GOTO+(MODE_SEMITILDE<<PAT_SHIFT) |
| microprogram for goto semitilde mode | |
| #define | PAT_GOTO_LESS PAT_GOTO+(MODE_LESS<<PAT_SHIFT) |
| microprogram for goto less mode | |
| #define | PAT_GOTO_GREATER PAT_GOTO+(MODE_GREATER<<PAT_SHIFT) |
| microprogram for goto greater mode | |
Special characters | |
When a chracter is barrable and is actually barred then its code is changed. For example codes for space and barred space are different for Lhogho, but should appear the same for users. Barrable characters are ()+- | |
| #define | MAX_ELEMS 128 |
| maximal number of elements in a statement | |
| char_t | debar [32] |
| table for a->|a| conversions | |
| atom_t | build_syntax_subtree (atom_t function, atom_t source) |
| parses sublist of commands | |
| void | init_parser () |
| initializes parser | |
| atom_t | tokenize (atom_t input, int method) |
| tokenizes into a list | |
| atom_t | trim_shell_comment (atom_t word) |
| trims shell comment (if any) | |
| atom_t | purify (atom_t word) |
| purifies a word | |
| atom_t | parentheses (atom_t input) |
| processes parentheses | |
| atom_t | define_user_function (atom_t source, atom_t input, atom_t parent) |
| creates var atom for a user-defined function | |
| atom_t | preparse (atom_t input, atom_t parent, int level) |
extracts all TO ... END definitions | |
| atom_t | parse (atom_t input, atom_t parent, int top_level) |
| parses a list into a tree | |
| atom_t | build_syntax_tree (atom_t func) |
| parses body of user-defined function | |
| #define GET_CHAR ch=*source |
| #define TOKEN_END 0 |
| #define TOKEN_WORD 1 |
| #define TOKEN_DIRTY_WORD 2 |
| #define TOKEN_OPEN 3 |
| #define TOKEN_CLOSE 4 |
| #define TOKEN_LINEEND 5 |
| #define TOKEN_SPACE 6 |
| #define TOKEN_ERROR 7 |
| #define MODE_ENTRY 0 |
| #define MODE_SPACE 1 |
| #define MODE_WORD 2 |
| #define MODE_BARRED 3 |
| #define MODE_BACKSLASHED 4 |
| #define MODE_TILDE 5 |
| #define MODE_SEMITILDE 6 |
| #define MODE_SEMICOLON 7 |
| #define MODE_TILDESPACE 8 |
| #define MODE_LESS 9 |
| #define MODE_GREATER 10 |
| #define MAX_MODE 11 |
| #define PAT_TILDE 0x000001 |
| #define PAT_PUSH 0x000002 |
| #define PAT_NEXT 0x000004 |
| #define PAT_DIRTY 0x000008 |
| #define PAT_MUTATED 0x000010 |
| #define PAT_RETURN_TO_WORD 0x000020 |
| #define PAT_RETURN_TO_SELF 0x000040 |
| #define PAT_GOTO 0x000080 |
| #define PAT_TOKEN 0x000100 |
| #define PAT_RETURN 0x000200 |
| #define PAT_ERROR_POS 0x000400 |
| #define PAT_ERROR 0x000800 |
| #define PAT_RETURN_TO_CALLER 0x001000 |
| #define PAT_NEXT2 0x002000 |
| #define PAT_SHIFT 16 |
| #define PAT_TOKEN_END PAT_TOKEN+(TOKEN_END<<PAT_SHIFT) |
| #define PAT_TOKEN_SPACE PAT_TOKEN+(TOKEN_SPACE<<PAT_SHIFT) |
| #define PAT_TOKEN_WORD PAT_TOKEN+(TOKEN_WORD<<PAT_SHIFT) |
| #define PAT_TOKEN_LINE PAT_TOKEN+(TOKEN_LINEEND<<PAT_SHIFT) |
| #define PAT_TOKEN_OPEN PAT_TOKEN+(TOKEN_OPEN<<PAT_SHIFT) |
| #define PAT_TOKEN_CLOSE PAT_TOKEN+(TOKEN_CLOSE<<PAT_SHIFT) |
| #define PAT_GOTO_SPACE PAT_GOTO+(MODE_SPACE<<PAT_SHIFT) |
| #define PAT_GOTO_WORD PAT_GOTO+(MODE_WORD<<PAT_SHIFT) |
| #define PAT_GOTO_BACKSLASHED PAT_GOTO+(MODE_BACKSLASHED<<PAT_SHIFT) |
| #define PAT_GOTO_BARRED PAT_GOTO+(MODE_BARRED<<PAT_SHIFT) |
| #define PAT_GOTO_SEMICOLON PAT_GOTO+(MODE_SEMICOLON<<PAT_SHIFT) |
| #define PAT_GOTO_TILDE PAT_GOTO+(MODE_TILDE<<PAT_SHIFT) |
| #define PAT_GOTO_TILDESPACE PAT_GOTO+(MODE_TILDESPACE<<PAT_SHIFT) |
| #define PAT_GOTO_SEMITILDE PAT_GOTO+(MODE_SEMITILDE<<PAT_SHIFT) |
| #define PAT_GOTO_LESS PAT_GOTO+(MODE_LESS<<PAT_SHIFT) |
| #define PAT_GOTO_GREATER PAT_GOTO+(MODE_GREATER<<PAT_SHIFT) |
| #define MAX_ELEMS 128 |
| #define LINK |
{ \
usedby[j] = i; \
args[i]--; \
}
| #define RELINK |
{ \
if( j<usedby[j] ) { largs[usedby[j]]++; finished=0; } \
if( j>usedby[j] ) { rargs[usedby[j]]++; finished=0; } \
LINK; \
}
| atom_t build_syntax_subtree | ( | atom_t | function, |
| atom_t | source | ||
| ) |
| function | var atom for the parse context |
| source | word or list containing the source |
Parses a list of commands (like these in IF or REPEAT) and build an abstract syntax tree:
TO ... ENDs and create them as subfunctions{
//printf("BUILD_SYNTAX_SUBTREE(");
//dump(NAME(function));
//printf(",");
//dump(source);
//printf(")\n\n");
// tokenize
atom_t tokens1 = tokenize( source, TOKENIZE_DATA );
if( IS_ERROR(tokens1) ) return tokens1;
//printf("tokens1="); dumpln(tokens1);
atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
DEUSE( tokens1 );
if( IS_ERROR(tokens2) ) return tokens2;
//printf("tokens2="); dumpln(tokens2);
// extract TO..END's
atom_t body = preparse( tokens2, function, LEVEL(function) );
if( IS_ERROR(body) ) return body;
//printf("body="); dumpln(body);
// parse function body
atom_t tree = parse( body, function, 1 );
if( IS_ERROR(tree) ) return tree;
//printf("tree="); dumpln(tree);
DEUSE( tokens2 );
//DEUSE( body );
return tree;
}
| void init_parser | ( | ) |
Initializes tables enbar[] and debar[] which are used to enbar and debar a character.
{
int i;
// NOTE! if the string of enbarrable characters is
// changed, also change dump_word()
chars_t s = TEXT("______________()+-*/=<>|?_______");
//locked-> x........xx..x..................
for( i=0; i<32; i++ ) debar[i]=i; // identity matrix
for( i=0; i<128; i++ ) enbar[i]=i; // identity matrix
for( i=0; *s; i++,s++ )
{
if( *s==TEXT('_') ) continue;
debar[i] = *s;
enbar[(unsigned char)*s] = i;
}
}
| input | word, subword or list to be tokenized |
| method | method of tokenization |
Tokenizes a word, a subword or a list into a list. If possible makes all words as subwords. Backslashes and bars in words are preserved (i.e. words are not purified). Comments and line continuations are ignored.
If the method is TOKENIZE_DATA then the input is tokenized as if it contains Logo data. If the method is TOKENIZE_COMMANDS then the input is tokenized as if it contains Logo commands. If the method is TOKENIZE_READWORD then the input is tokenized as expected by READWORD function.
If the input is a list then all its elements are tokenized one-by-one.
Return value is the tokenized list. In there is an error, returns an error atom which error code is ERROR_INCOMPLETE_PAIR, error position points the position in the word (0-based) and the error source is the word itself.
{
if( IS_LIST(input) )
{
if( method==TOKENIZE_DATA &&
GET_FLAGS( input, FLAG_TOKENIZED_DATA|FLAG_TOKENIZED_COMMANDS ) )
return USE(input);
if( method==TOKENIZE_COMMANDS &&
GET_FLAGS( input, FLAG_TOKENIZED_COMMANDS ) )
return USE(input);
}
if( IS_FLOAT(input) )
{
return USE(input);
}
//printf("\n");
//if(method==TOKENIZE_DATA)
// printf("ENTER TOKENIZE_DATA(");
//else
// printf("ENTER TOKENIZE_COMMANDS(");
//dump_atom(input,1);
//printf(")\n");
#ifdef SAFEMODE
assert( IS_WORD(input) || IS_SUBWORD(input) || IS_LIST(input) );
#endif
// First check whether the input is a list.
// If it is then tokenize recursively all its elements.
if( IS_LIST(input) )
{
atom_t result = empty_list;
atom_t last = empty_list;
atom_t x;
atom_t y;
for( x=input; IS_NOT_EMPTY(x); x=CDR(x) )
{
//printf("@@@@@@@@PROCESSING=|"); dump_atom(CAR(x),1); printf("|\n");
int submethod = IS_LIST(CAR(x))?TOKENIZE_DATA:method;
atom_t element = tokenize( CAR(x), submethod );
//printf("@@@@@@@@TOKENIZED_INTO=|"); dump_atom(element,1); printf("|\n");
if( IS_ERROR(element) )
{
DEUSE( result );
//DEUSE( last );
result = element;
break;
}
int initial_flags = GET_FLAGS( x, FLAG_NEWLINE|FLAG_AFTER_SPACE );
int final_flags = GET_FLAGS( x, FLAG_BEFORE_SPACE );
//printf(">>>CAR(x) = "); dump_atom(CAR(x),1); printf("\n");
//printf(">>>element = "); dump_atom(element,1); printf("\n");
if( IS_FLOAT(element) )
{
append( USE(element), &result, &last );
SET_FLAGS( last, initial_flags|final_flags );
}
else if( IS_LIST(CAR(x)) )
{
if( IS_EXTENDED(x) )
{
append_ex( USE(element), &result, &last );
DEUSE( POS(last) );
POS( last ) = USE( POS(x) );
}
else
append( USE(element), &result, &last );
SET_FLAGS( last, initial_flags|final_flags );
}
else
{
for( y=element; IS_NOT_EMPTY(y); y=CDR(y) )
{
//printf("APPEND SUBELEMENT |");
//dump_atom(CAR(y),1);
//printf("|\n");
if( IS_EXTENDED(y) )
{
append_ex( USE(CAR(y)), &result, &last );
DEUSE( POS(last) );
POS( last ) = USE( POS(y) );
}
else
append( USE(CAR(y)), &result, &last );
SET_FLAGS( last, FLAGS(y) );
if( y==element ) SET_FLAGS( last, initial_flags );
}
#ifdef SAFE_MODE
assert( IS_NOT_EMPTY(last) );
#endif
SET_FLAGS( last, final_flags);
}
DEUSE( element );
}
//printf("FINAL RESULT IS |"); dump_atom(result,1); printf("|\n");
return result;
}
// The input is a word or a subword
chars_t source = STRING(input);
int len = LENGTH(input);
int origlen = len;
chars_t buffer = ALLOC( CHAR_SIZE*len ); // buffer for the longest word
//chars_t bp = buffer;
char_t ch;
int_t errpos = -1;
//char_t errchar = NULL_CHAR;
int last_token = TOKEN_LINEEND;
//int crlf = 0;
// Gets the next token. Return:
// TOKEN_END if there are no more tokens
// TOKEN_WORD if the token is a word
// TOKEN_DIRTY_WORD if the token is a word with \ or |
// TOKEN_OPEN if the token is [
// TOKEN_CLOSE if the token is ]
// TOKEN_LINEEND if the token is <nl>
// TOKEN_SPACE if at least one whitespace is met
//int co=0;
int get_token( atom_t *token, int method )
{
//co++;
//if (0 == co%1024)
//{
//printf("%d ",co);
//}
// return 1 if buffer contains number
int is_number(chars_t bp)
{ // "E" {digit}* "." {digit}+
// 1 2 3 4
chars_t cp = bp;
int num_mode = 1;
char_t ch;
cp = bp;
if( bp==buffer ) return 0;
while( cp>buffer )
{
cp--;
ch = *cp;
//printf("num_mode=%d ch=%C\n",num_mode,ch);
switch( num_mode )
{
case 1:
if( ch!=TEXT('E') && ch!=TEXT('e') ) return 0;
num_mode = 2;
break;
case 2: ;
if( ch<TEXT('0') || ch>TEXT('9') ) return 0;
num_mode = 3;
break;
case 3: ;
if( ch!=TEXT('.') ) return 0;
num_mode = 4;
break;
case 4: ;
if( ch<TEXT('0') || ch>TEXT('9') ) return 0;
break;
}
//printf("num_mode=%d\n",num_mode);
}
return 1;
}
if( !len ) return TOKEN_END;
int dirty = 0;
int mutated = 0;
chars_t bp = buffer; *bp=NULL_CHAR;
chars_t sp = source;
int mode = MODE_ENTRY; // current mode
int code; // action code
int stack[MAX_MODE]; // return-to-mode for each mode
static int mode_eof[MAX_MODE] =
{
/* entry */ PAT_TOKEN_END,
/* whitespace */ PAT_TOKEN_SPACE,
/* word */ PAT_TOKEN_WORD,
/* barred */ PAT_ERROR,
/* backslashed */ PAT_ERROR,
/* tilde */ PAT_ERROR,
/* semitilde */ PAT_ERROR,
/* semicolon */ PAT_RETURN,
/* tildespace */ PAT_ERROR,
/* less */ PAT_TOKEN_WORD,
/* greater */ PAT_TOKEN_WORD
};
static int mode_eol[MAX_MODE] =
{
/* entry */ PAT_NEXT+PAT_TOKEN_LINE,
/* whitespace */ PAT_TOKEN_SPACE,
/* word */ PAT_TOKEN_WORD,
/* barred */ PAT_PUSH+PAT_NEXT,
/* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
/* tilde */ PAT_NEXT+PAT_RETURN,
/* semitilde */ PAT_NEXT+PAT_RETURN,
/* semicolon */ PAT_RETURN,
/* tildespace */ PAT_NEXT+PAT_RETURN,
/* less */ PAT_TOKEN_WORD,
/* greater */ PAT_TOKEN_WORD
};
static int mode_space[MAX_MODE] =
{
/* entry */ PAT_NEXT+PAT_GOTO_SPACE,
/* whitespace */ PAT_NEXT,
/* word */ PAT_TOKEN_WORD,
/* barred */ PAT_PUSH+PAT_NEXT,
/* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
/* tilde */ PAT_NEXT+PAT_GOTO_TILDESPACE,
/* semitilde */ PAT_NEXT,
/* semicolon */ PAT_NEXT,
/* tildespace */ PAT_NEXT,
/* less */ PAT_TOKEN_WORD,
/* greater */ PAT_TOKEN_WORD
};
static int mode_open[MAX_MODE] =
{
/* entry */ PAT_NEXT+PAT_TOKEN_OPEN,
/* whitespace */ PAT_TOKEN_SPACE,
/* word */ PAT_TOKEN_WORD,
/* barred */ PAT_PUSH+PAT_NEXT,
/* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
/* tilde */ PAT_TILDE+PAT_RETURN,
/* semitilde */ PAT_RETURN,
/* semicolon */ PAT_NEXT,
/* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
/* less */ PAT_TOKEN_WORD,
/* greater */ PAT_TOKEN_WORD
};
static int mode_close[MAX_MODE] =
{
/* entry */ PAT_NEXT+PAT_TOKEN_CLOSE,
/* whitespace */ PAT_TOKEN_SPACE,
/* word */ PAT_TOKEN_WORD,
/* barred */ PAT_PUSH+PAT_NEXT,
/* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
/* tilde */ PAT_TILDE+PAT_RETURN,
/* semitilde */ PAT_RETURN,
/* semicolon */ PAT_NEXT,
/* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
/* less */ PAT_TOKEN_WORD,
/* greater */ PAT_TOKEN_WORD
};
static int mode_bar[MAX_MODE] =
{
/* entry */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_RETURN_TO_WORD+PAT_GOTO_BARRED+PAT_ERROR_POS,
/* whitespace */ PAT_TOKEN_SPACE,
/* word */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_GOTO_BARRED+PAT_RETURN_TO_SELF+PAT_ERROR_POS,
/* barred */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
/* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
/* tilde */ PAT_TILDE+PAT_RETURN,
/* semitilde */ PAT_RETURN,
/* semicolon */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_GOTO_BARRED+PAT_RETURN_TO_SELF+PAT_ERROR_POS,
/* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
/* less */ PAT_TOKEN_WORD,
/* greater */ PAT_TOKEN_WORD
};
static int mode_backslash[MAX_MODE] =
{
/* entry */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_RETURN_TO_WORD+PAT_GOTO_BACKSLASHED+PAT_ERROR_POS,
/* whitespace */ PAT_TOKEN_SPACE,
/* word */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_RETURN_TO_SELF+PAT_GOTO_BACKSLASHED+PAT_ERROR_POS,
/* barred */ PAT_PUSH+PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_BACKSLASHED,
/* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
/* tilde */ PAT_TILDE+PAT_RETURN,
/* semitilde */ PAT_RETURN,
/* semicolon */ PAT_NEXT2, //+PAT_RETURN_TO_SELF+PAT_GOTO_BACKSLASHED,
/* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
/* less */ PAT_TOKEN_WORD,
/* greater */ PAT_TOKEN_WORD
};
static int mode_tilde[MAX_MODE] =
{
/* entry */ PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_TILDE+PAT_ERROR_POS,
/* whitespace */ PAT_TOKEN_SPACE,
/* word */ PAT_NEXT+PAT_MUTATED+PAT_RETURN_TO_SELF+PAT_GOTO_TILDE+PAT_ERROR_POS,
/* barred */ PAT_PUSH+PAT_NEXT,
/* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
/* tilde */ PAT_TILDE+PAT_RETURN,
/* semitilde */ PAT_RETURN,
/* semicolon */ PAT_NEXT+PAT_MUTATED+PAT_RETURN_TO_CALLER+PAT_GOTO_SEMITILDE+PAT_ERROR_POS,
/* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
/* less */ PAT_TOKEN_WORD,
/* greater */ PAT_TOKEN_WORD
};
static int mode_semicolon[MAX_MODE] =
{
/* entry */ PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_SEMICOLON,
/* whitespace */ PAT_TOKEN_SPACE,
/* word */ PAT_NEXT+PAT_MUTATED+PAT_RETURN_TO_SELF+PAT_GOTO_SEMICOLON,
/* barred */ PAT_PUSH+PAT_NEXT,
/* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
/* tilde */ PAT_TILDE+PAT_RETURN,
/* semitilde */ PAT_RETURN,
/* semicolon */ PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_SEMITILDE,
/* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
/* less */ PAT_TOKEN_WORD,
/* greater */ PAT_TOKEN_WORD
};
static int mode_else[MAX_MODE] =
{
/* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_WORD,
/* whitespace */ PAT_TOKEN_SPACE,
/* word */ PAT_PUSH+PAT_NEXT,
/* barred */ PAT_PUSH+PAT_NEXT,
/* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
/* tilde */ PAT_TILDE+PAT_RETURN,
/* semitilde */ PAT_RETURN,
/* semicolon */ PAT_NEXT,
/* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
/* less */ PAT_TOKEN_WORD,
/* greater */ PAT_TOKEN_WORD
};
static int mode_parens[MAX_MODE] =
{
/* entry */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
/* whitespace */ PAT_TOKEN_SPACE,
/* word */ PAT_TOKEN_WORD,
/* barred */ PAT_PUSH+PAT_NEXT,
/* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
/* tilde */ PAT_TILDE+PAT_RETURN,
/* semitilde */ PAT_RETURN,
/* semicolon */ PAT_NEXT,
/* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
/* less */ PAT_TOKEN_WORD,
/* greater */ PAT_TOKEN_WORD
};
static int mode_equal[MAX_MODE] =
{
/* entry */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
/* whitespace */ PAT_TOKEN_SPACE,
/* word */ PAT_TOKEN_WORD,
/* barred */ PAT_PUSH+PAT_NEXT,
/* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
/* tilde */ PAT_TILDE+PAT_RETURN,
/* semitilde */ PAT_RETURN,
/* semicolon */ PAT_NEXT,
/* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
/* less */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
/* greater */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD
};
static int mode_less[MAX_MODE] =
{
/* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_LESS,
/* whitespace */ PAT_PUSH+PAT_NEXT+PAT_GOTO_LESS, //PAT_TOKEN_WORD, @boza
/* word */ PAT_TOKEN_WORD,
/* barred */ PAT_PUSH+PAT_NEXT,
/* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
/* tilde */ PAT_TILDE+PAT_RETURN,
/* semitilde */ PAT_RETURN,
/* semicolon */ PAT_NEXT,
/* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
/* less */ PAT_TOKEN_WORD,
/* greater */ PAT_TOKEN_WORD
};
static int mode_greater[MAX_MODE] =
{
/* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_GREATER,
/* whitespace */ PAT_PUSH+PAT_NEXT+PAT_GOTO_GREATER, //PAT_TOKEN_WORD, @boza
/* word */ PAT_TOKEN_WORD,
/* barred */ PAT_PUSH+PAT_NEXT,
/* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
/* tilde */ PAT_TILDE+PAT_RETURN,
/* semitilde */ PAT_RETURN,
/* semicolon */ PAT_NEXT,
/* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
/* less */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
/* greater */ PAT_TOKEN_WORD
};
again:
// get action code
GET_CHAR;
#ifdef DEBUG_TOKENS
printf("<TOKENS> length=%d\n",len);
if(ch<TEXT(' '))
printf("<TOKENS> get(#%d)\t",ch);
else
printf("<TOKENS> get('%c')\t",ch);
switch( mode )
{
case MODE_ENTRY: printf(" ENTRY -> "); break;
case MODE_SPACE: printf(" SPACE -> "); break;
case MODE_WORD: printf(" WORD -> "); break;
case MODE_BARRED: printf(" BARRED -> "); break;
case MODE_BACKSLASHED: printf(" BACKSLASH -> "); break;
case MODE_TILDE: printf(" TILDE -> "); break;
case MODE_SEMITILDE: printf(" SEMITILDE -> "); break;
case MODE_SEMICOLON: printf(" SEMICOLON -> "); break;
case MODE_TILDESPACE: printf("TILDESPACE -> "); break;
}
#endif
code = mode_else[mode];
if( !len )
code = mode_eof[mode];
else
{
if( method==TOKENIZE_READWORD )
{ // tokenize as expected by READWORD
if( ch==TEXT('|') ) code = mode_bar[mode];
else if( ch==TEXT('\\') ) code = mode_backslash[mode];
}
else
if( method==TOKENIZE_READLIST )
{ // tokenize as data
if( ch==TEXT('\n') ) code = mode_eol[mode];
else if( ch==TEXT('\r') ) code = mode_eol[mode];
else if( ch<=TEXT(' ') ) code = mode_space[mode];
else if( ch==TEXT('[') ) code = mode_open[mode];
else if( ch==TEXT(']') ) code = mode_close[mode];
else if( ch==TEXT('|') ) code = mode_bar[mode];
else if( ch==TEXT('\\') ) code = mode_backslash[mode];
else if( ch==TEXT('~') ) code = mode_tilde[mode];
}
else
if( method==TOKENIZE_DATA )
{ // tokenize as data
if( ch==TEXT('\n') ) code = mode_eol[mode];
else if( ch==TEXT('\r') ) code = mode_eol[mode];
else if( ch<=TEXT(' ') ) code = mode_space[mode];
else if( ch==TEXT('[') ) code = mode_open[mode];
else if( ch==TEXT(']') ) code = mode_close[mode];
else if( ch==TEXT('|') ) code = mode_bar[mode];
else if( ch==TEXT('\\') ) code = mode_backslash[mode];
else if( ch==TEXT('~') ) code = mode_tilde[mode];
else if( ch==TEXT(';') ) code = mode_semicolon[mode];
}
else
{ // tokenize as commands
//printf("ch=%c *buf=%c\n",ch,*buffer);
if( ch==TEXT('(') ||
ch==TEXT(')') ) code = mode_parens[mode];
else if( *buffer!=TEXT('"') )
{
if( (ch==TEXT('+') || ch==TEXT('-')) && is_number(bp) ) { }
else if( ch==TEXT('+') ||
ch==TEXT('-') ||
ch==TEXT('*') ||
ch==TEXT('/')) code = mode_parens[mode];
else if( ch==TEXT('=') ) code = mode_equal[mode];
else if( ch==TEXT('<') ) code = mode_less[mode];
else if( ch==TEXT('>') ) code = mode_greater[mode];
}
}
}
// process action code
int _stack = -1;
int newmode = 0;
if( code & PAT_GOTO )
{
// this is pred-processing of GOTO
// if old mode was space, and new is not,
// then update initial position of next token
newmode = (code>>PAT_SHIFT) & 0xF;
if( mode==MODE_SPACE && newmode!=MODE_SPACE ) { sp = source; }
}
if( code & PAT_TILDE )
{
*bp++ = TEXT('~');
#ifdef DEBUG_TOKENS
printf("\n<TOKENS> put('%c'/%d)\n",TEXT('~'),TEXT('~'));
#endif //DEBUG_TOKENS
}
if( code & PAT_PUSH )
{
// push a character only if:
// - currently not in bars |..?..|
// - currently in bars, but not in semicolon ;..|..?..|
if( mode!=MODE_BARRED ||
(stack[mode]!=MODE_SEMITILDE &&
stack[mode]!=MODE_SEMICOLON) )
{
//if( mode==MODE_BARRED || mode==MODE_BACKSLASHED )
//*bp++ = ENBAR(ch);
//else
*bp++ = ch;
#ifdef DEBUG_TOKENS
printf("\n<TOKENS> put('%c'/%d)\n",ch,ch);
#endif //DEBUG_TOKENS
//if( ch=='\r' && *(source+1)=='\n' ) // handle CRLF cases
//{
//*bp++ = '\n';
//#ifdef DEBUG_TOKENS
// printf("\n<TOKENS> put('%d')\n",'\n');
// #endif //DEBUG_TOKENS
//}
}
}
if( code & PAT_NEXT2 )
{
source++;
len--;
}
if( code & (PAT_NEXT|PAT_NEXT2) )
{
if( *source=='\r' )
{
//crlf = 0;
if( *(source+1)=='\n' )
{
//crlf = 1;
source++;
len--;
}
}
source++;
len--;
}
if( code & PAT_DIRTY ) dirty = 1;
if( code & PAT_MUTATED ) mutated = 1;
if( code & PAT_ERROR_POS ) errpos = origlen-len-1;
if( code & PAT_RETURN_TO_WORD ) _stack = MODE_WORD;
if( code & PAT_RETURN_TO_SELF ) _stack = mode;
if( code & PAT_RETURN_TO_CALLER ) _stack = stack[mode];
if( code & PAT_GOTO )
{
// this is post-processing of GOTO
mode = newmode;
stack[mode] = _stack;
}
if( code & PAT_RETURN ) mode = stack[mode];
if( code & PAT_TOKEN )
{
int _token = (code>>PAT_SHIFT) & 0xF;
if( _token!=TOKEN_WORD ) return _token;
if( mutated )
{
*bp = NULL_CHAR;
*token = new_word( buffer, UNKNOWN );
#ifdef DEBUG_TOKENS
printf("MUTATED TOKEN "); dumpln(*token);
printf("\n\n");
#endif
}
else
{
*token = new_subword( input, sp, source-sp /*bp-buffer*/ );
#ifdef DEBUG_TOKENS
printf("NORMAL TOKEN **"); dump(*token);
printf("** (len=%d)\n\n\n",source-sp);
#endif
}
return dirty?TOKEN_DIRTY_WORD:TOKEN_WORD;
}
if( code & PAT_ERROR )
{
#ifdef DEBUG_TOKENS
printf("ERROR\n");
#endif
return TOKEN_ERROR;
}
#ifdef DEBUG_TOKENS
switch( mode )
{
case MODE_ENTRY: printf("ENTRY\n"); break;
case MODE_SPACE: printf("SPACE\n"); break;
case MODE_WORD: printf("WORD\n"); break;
case MODE_BARRED: printf("BARRED\n"); break;
case MODE_BACKSLASHED: printf("BACKSLASH\n"); break;
case MODE_TILDE: printf("TILDE\n"); break;
case MODE_SEMITILDE: printf("SEMITILDE\n"); break;
case MODE_SEMICOLON: printf("SEMICOLON\n"); break;
case MODE_TILDESPACE: printf("TILDESPACE\n"); break;
}
#endif
goto again;
} // get_token()
atom_t get_sublist( int level, int full_parse, atom_t* pos ) //ex2//
{
atom_t result = empty_list;
atom_t last = empty_list;
if( pos ) *pos = NULL; //ex2//
atom_t token = NULL;
atom_t sublist_pos = NULL; //ex2//
int flags;
int bracketlen = len; // LEN of last opening bar
int pos_from = source-STRING(input); //ex2//
if( pos_from ) pos_from--; //ex2//
flags = 0;//FLAG_NEWLINE;
while( (last_token=get_token(&token,full_parse)) )
{
sublist_pos = NULL;
#ifdef DEBUG_TOKENIZATION
switch(last_token)
{
case TOKEN_END: printf("TOKEN_END\n"); break;
case TOKEN_SPACE: printf("TOKEN_SPACE\n"); break;
case TOKEN_WORD: printf("TOKEN_WORD @"); dump(token); printf("@\n"); break;
case TOKEN_DIRTY_WORD: printf("TOKEN_|WORD| @"); dump(token); printf("@\n"); break;
case TOKEN_OPEN: printf("TOKEN_OPEN [\n"); break;
case TOKEN_CLOSE: printf("TOKEN_CLOSE ]\n"); break;
case TOKEN_LINEEND: printf("TOKEN_LINEEND\n"); break;
case TOKEN_ERROR: printf("TOKEN_ERROR\n"); break;
}
#endif
if( last_token==TOKEN_ERROR ) return result;
if( last_token==TOKEN_SPACE )
{
if( IS_NOT_EMPTY(last) ) SET_FLAGS( last, FLAG_BEFORE_SPACE );
flags |= FLAG_AFTER_SPACE;
continue;
}
if( last_token==TOKEN_DIRTY_WORD )
{
//printf("###BEFORE="); dumpln(token);
atom_t x = purify( token );
DEUSE( token );
token = x;
//printf("###AFTER="); dumpln(token);
}
if( last_token==TOKEN_CLOSE ) break;
if( last_token==TOKEN_LINEEND )
{
flags |= FLAG_NEWLINE;
continue;
}
if( last_token==TOKEN_OPEN)
{
token = get_sublist( level+1, TOKENIZE_DATA, &sublist_pos ); // recursive //ex2//
if( last_token==TOKEN_ERROR )
{
DEUSE( token );
if( sublist_pos ) DEUSE(sublist_pos);
sublist_pos = NULL;
break;
}
}
if( method==TOKENIZE_COMMANDS )
flags |= FLAG_TOKENIZED_COMMANDS;
if( method==TOKENIZE_DATA )
flags |= FLAG_TOKENIZED_DATA;
if( method==TOKENIZE_COMMANDS &&
last_token==TOKEN_WORD &&
LENGTH(token)>1 &&
*STRING(token)==TEXT('?') &&
*(STRING(token)+1)>=TEXT('0') &&
*(STRING(token)+1)<=TEXT('9') )
{
//printf(">>>%d %d\n", last_token==TOKEN_WORD, last_token==TOKEN_DIRTY_WORD);
// process template ?nn->(? nn) for command tokenization
//printf("append token **"); dump(token); printf("**\n");
atom_t new_qoken = new_subword( token, STRING(token), 1 );
atom_t new_token = new_subword( token, STRING(token)+1, LENGTH(token)-1 );
DEUSE( token );
append( new_word(TEXT("("),-1), &result, &last ); // (
append( new_qoken, &result, &last ); // ?
append( new_token, &result, &last ); // nn
append( new_word(TEXT(")"),-1), &result, &last ); // )
}
else
{
// normal token, no more processing needed
if( sublist_pos ) //ex//
{
append_ex( token, &result, &last );
POS( last ) = sublist_pos;
//printf("\n\nSET EXTENDED POSITION ");
//dump_atom(sublist_pos,1); printf("\n");
//printf("CURRENT RESULT ");
//dump_atom(result,1); printf("\n\n");
sublist_pos = NULL;
}
else
{
append( token, &result, &last );
}
SET_FLAGS( last, flags );
}
flags = 0;
if( last_token==TOKEN_ERROR ) break;
}
// test for unmatching square brackets
// i.e. ...[... or ...]...
if( level )
{
if( last_token==TOKEN_END )
{
errpos = origlen-bracketlen-1;
last_token = TOKEN_ERROR;
}
}
else
{
if( last_token==TOKEN_CLOSE )
{
errpos = origlen-len-1;
last_token = TOKEN_ERROR;
}
}
int pos_to = source-STRING(input);
if( pos_to ) pos_to--;
if( pos && method == TOKENIZE_DATA ) //ex2//
{
*pos = new_subword( input, STRING(input)+pos_from, pos_to-pos_from+1 );
}
return result;
} // get_sublist()
atom_t result = get_sublist( 0, method, NULL ); //ex//
// in case of error return empty list
if( last_token==TOKEN_ERROR )
{
//printf("ERROR RESULT=");dumpln(result);
DEUSE( result );
result = new_parse_error( ERROR_INCOMPLETE_PAIR, errpos, input );
}
DEALLOC( buffer );
//printf("#########");
//dump_atom(result,1);
//printf("######\n");
return result;
}
| atom_t trim_shell_comment | ( | atom_t | word | ) |
| word | word containing source text |
Trims a shell comment from the beginning of the word. Shell comment can be only the first line if its first two characters are #!. If a shell comment is trimmed, then the result is a subword from the first character on the second line, otherwise the input word is returned as is but with increased reference count.
{
#ifdef SAFEMODE
assert( IS_WORD(word) || IS_SUBWORD(word) );
#endif
chars_t source = STRING(word);
int_t len = LENGTH(word);
// if there are no enough characters just exit
if( LENGTH(word)<2 ) return USE(word);
// if the first two characters are not #! then exit
if( *source!=TEXT('#') || *(source+1)!=TEXT('!') ) return USE(word);
// skip the line
while( len && *source!=TEXT('\n') )
{
source++;
len--;
}
// return a subword. Pay attention to always reference
// the main host word because the input could be a word
// or a subword.
if( IS_WORD(word) )
return new_subword( word, source, len );
else
return new_subword( WORD(word), source, len );
}
| word | word to be purified |
Purifies a word by processing all backslashes and bars. Returns a new word if needed. Assumes that the input needs purification.
{
//return USE(word);
#ifdef SAFEMODE
assert( IS_WORD(word) || IS_SUBWORD(word) );
#endif
chars_t source = STRING(word);
int_t len = LENGTH(word);
chars_t buffer = alloca( CHAR_SIZE*len ); // buffer for the longest word
chars_t bp = buffer;
int need_enbar = 0;
int is_mutated = 0; // set to 1 if the word is mutated
int in_backslash = 0;
int in_bars = 0;
for( ; len; len--,source++ )
{
need_enbar = in_bars || in_backslash;
if( in_backslash )
{
in_backslash = 0;
}
else if( *source==TEXT('\\') )
{
is_mutated = 1;
in_backslash = 1;
continue;
}
else if( *source==TEXT('|') )
{
is_mutated = 1;
in_bars = !in_bars;
continue;
}
if( need_enbar )
*bp++ = ENBAR(*source);
else
*bp++ = *source;
//if( need_enbar )
//printf(" PURIFY %d %d\n",*source,ENBAR(*source));
//else
//printf(" PURIFY %d \n",*source );
}
*bp = NULL_CHAR;
if( is_mutated )
return new_word( buffer, bp-buffer );
else
return USE(word);
}
| atom_t parentheses | ( | atom_t | input | ) |
| input | flat list |
Processes all (...) of a flat list by making them as sublists. Returns ERROR_INCOMPLETE_PAIR if the parentheses are not paired well.
{
atom_t result = empty_list;
atom_t _paren( int level, atom_t openparen, int* last_flags )
{
#ifdef DEBUG_PARENTHESES
printf("<PAREN> ENTER LEVEL %d INPUT=", level);
dumpln(input);
#endif //DEBUG_PARENTHESES
*last_flags = 0;
atom_t result = empty_list;
atom_t last = empty_list;
while( IS_NOT_EMPTY(input) )
{
// get current element
atom_t elem = USE(CAR(input));
int flags = FLAGS(input);
#ifdef DEBUG_PARENTHESES
printf("<PAREN> READ ELEMENT ");
dumpln(elem);
#endif //DEBUG_PARENTHESES
// if it is ( then call _parse() recursively
// if it is ) then exit current _parse()
if( IS_WORD(elem)||IS_SUBWORD(elem) )
if( LENGTH(elem)==1 )
{
if( *STRING(elem)==TEXT('(') ) // process (
{
int f;
DEUSE( elem ); //compensate USE() when elem was retrieved
input = CDR(input);
elem = _paren( level+1, elem, &f );
flags |= f;
if( IS_ERROR(elem) )
{
DEUSE( result );
return elem;
}
}
else if( *STRING(elem)==TEXT(')') )// process )
{
DEUSE( elem ); //compensate USE() when elem was retrieved
*last_flags = FLAGS(input);
break;
}
}
#ifdef DEBUG_PARENTESES
printf("<PAREN> APPEND ");
dumpln(elem);
#endif //DEBUG_PARENTHESES
// append current element
if( IS_EXTENDED(input) )
{
append_ex( elem, &result, &last );
DEUSE( POS(last) );
POS( last ) = USE( POS( input ) );
}
else
append( elem, &result, &last );
FLAGS(last) |= flags;
// empty list
if( IS_EMPTY(input) ) break;
// move to next element
input = CDR(input);
}
if( level && IS_EMPTY(input) )
{
DEUSE( result );
return new_error( ERROR_INCOMPLETE_PAIR, openparen );
}
if( !level && !IS_EMPTY(input) )
{
DEUSE( result );
return new_error( ERROR_INCOMPLETE_PAIR, CAR(input) );
}
if( level && IS_EMPTY(result) )
{
return new_error( ERROR_EMPTY_EXPRESSION, openparen );
}
#ifdef DEBUG_PARENTHESES
printf("<PAREN> EXIT LEVEL %d RESULT=", level);
dumpln(result);
#endif //DEBUG_PARENTHESES
if( level ) SET_FLAGS( result, FLAG_EXPRESSION );
return result;
} // _paren()
int f;
result = _paren( 0, empty_list, &f );
return result;
}
| atom_t define_user_function | ( | atom_t | source, |
| atom_t | input, | ||
| atom_t | parent | ||
| ) |
| source | word containing the source |
| input | contents between TO and END |
| parent | context var where creation is done |
Defines new user-defined command or function. The input contains the contents of the function -- these are all tokens between TO and END. Var creation is done in several steps:
: and are on a single line: or double quotes ".: and are on the same line.{
#ifdef DEBUG_TO_END
printf("<TO-END> DEFINING=");
dumpln( input);
#endif
//atom_t last;
atom_t a = input;
atom_t name;
atom_t lhi; // last_header_item
// create the function
atom_t function = new_var( word_to, parent, 1 ); //DEUSE(to);
need_descr2( function );
SET_FLAGS( function, FLAG_FUNCTION );
LARGS( function ) = 0;
RARGS( function ) = 0;
// collecting left parameters
while( IS_NOT_EMPTY(a) &&
!GET_FLAGS(a,FLAG_NEWLINE) &&
IS_ANY_WORD(CAR(a)) &&
(*STRING(CAR(a))==TEXT(':')) )
{
lhi = CAR(a);
atom_t var = new_local_var( CAR(a), function, 1 );
if( IS_ERROR(var) )
{
DEUSE( input );
//DEUSE( function );
LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
return var;
}
SET_FLAGS( var, FLAG_VARIABLE );
LARGS( function )++;
#ifdef SAFE_MODE
assert( LARGS(function)<255 );
#endif
a = CDR(a);
}
// collecting name
if( IS_NOT_EMPTY(a) &&
!GET_FLAGS(a,FLAG_NEWLINE) &&
IS_ANY_WORD(CAR(a)) &&
(*STRING(CAR(a))!=TEXT('"')) )
{
lhi = CAR(a);
name = CAR(a);
a = CDR(a);
}
else
{
DEUSE( input );
return new_error( ERROR_MISSING_NAME, a );
}
//printf("<TO-END> DEFINING="); dumpln( name );
// collecting right parameters
while( IS_NOT_EMPTY(a) &&
!GET_FLAGS(a,FLAG_NEWLINE) &&
IS_ANY_WORD(CAR(a)) &&
(*STRING(CAR(a))==TEXT(':')) )
{
lhi = CAR(a);
atom_t var = new_local_var( CAR(a), function, 1 );
if( IS_ERROR(var) )
{
DEUSE( input );
//DEUSE( function );
LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
return var;
}
SET_FLAGS( var, FLAG_VARIABLE );
RARGS( function )++;
#ifdef SAFE_MODE
assert( RARGS(function)<255 );
#endif
a = CDR(a);
}
// collect "..."
if( IS_NOT_EMPTY(a) &&
!GET_FLAGS(a,FLAG_NEWLINE) &&
IS_ANY_WORD(CAR(a)) &&
(LENGTH(CAR(a))==3) &&
(*(STRING(CAR(a))+0)==TEXT('.')) &&
(*(STRING(CAR(a))+1)==TEXT('.')) &&
(*(STRING(CAR(a))+2)==TEXT('.')) )
{
SET_FLAGS( function, FLAG_INFINITE_ARGS );
a = CDR(a);
}
// set offset of parameters
int offset = BASE_OFFSET_PARAMS; // this is the start offset
atom_t x;
for( x = LOCALS(function); IS_NOT_EMPTY(x); x=CDR(x) )
{
//printf("set offset of "); dump(NAME(CAR(x))); printf(" to be %d\n",offset);
OFFSET(CAR(x)) = offset;
offset += sizeof( atom_t );
}
// check whether the function is already defined
atom_t var = find_local_var( name, parent );
if( var )
{
if( LARGS(var)!=LARGS(function) || RARGS(var)!=RARGS(function) )
{
LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
atom_t result = new_error( ERROR_INCOMPATIBLE_REDEFINITION, input );
DEUSE( input );
return result;
}
// forget data of the old function and
// reuse data of the new function
DEUSE( FULLSOURCE(var) );
DEUSE( SOURCE(var) );
DEUSE( LOCALS(var) );
DEUSE( BODY(var) );
DEUSE( TREE(var) );
DEUSE( BINARY(var) );
TREE(var) = empty_list;
BINARY(var) = empty_list;
BODY(var) = USE(a); //USE(BODY(function));
LOCALS(var) = USE(LOCALS(function));
LOCALS(parent) = behead(LOCALS(parent));
//DEUSE(function);
function = var;
}
else
{
DEUSE( NAME(function) );
NAME( function ) = USE(name);
BODY( function ) = USE(a);
}
//LEVEL( function ) = level;
ADDRESS( function ) = 0;
PRIORITY( function ) = PRIORITY_FUN;
// cut header and 'END' from source
chars_t new_src = STRING(lhi)+LENGTH(lhi);
int new_len = LENGTH(source) - (new_src-STRING(source)) - LENGTH(word_end);
SOURCE( function ) = new_subword( source, new_src, new_len );
FULLSOURCE( function ) = USE( source );
//DEUSE( input );
#ifdef DEBUG_TO_END
printf("<TO-END> DEFINED FUNCTION "); dumpln(NAME(function));
printf("<TO-END> SOURCE "); dumpln(SOURCE(function));
printf("<TO-END> BODY "); dumpln(BODY(function));
printf("<TO-END> TREE "); dumpln(TREE(function));
#endif
//printf("@@@@@@DEFINE "); dump_atom(NAME(function),1);
//printf(" IN PARENT "); dump_atom(NAME(PARENT(function)),1);
//printf("@@@@@@\n");
return function;
}
| input | list with tokenized source code |
| parent | context var where preparsing is done |
| level | current level of nesting return source without TO ... END definitions |
Pre-parses a flat tokenized list by extracting all local TO ... END definitions and creating corresponding local functions. Returns the remaining source code.
{
#ifdef DEBUG_TO_END
printf("<TO-END> ENTER1=");
dumpln( input);
#endif
atom_t a = input;
atom_t before_a = NULL;
atom_t before_to = NULL;
atom_t first_to = NULL;
int to_end_depth = 0;
while( IS_NOT_EMPTY(a) )
{
if( IS_ANY_WORD(CAR(a)) )
{
// found TO or its synonym
if( same_words(CAR(a),word_to) || same_words(CAR(a),word_to_syn) )
{
if( to_end_depth==0 )
{
before_to = before_a;
first_to = a;
}
to_end_depth++;
}
// found END
if( same_words( CAR(a), word_end ))
{
// END without to ?
if( to_end_depth==0 )
{
atom_t result = new_error( ERROR_INCOMPLETE_PAIR, CAR(a) );
DEUSE( input );
return result;
}
to_end_depth--;
if( to_end_depth==0 )
{
// found END corresponding to TO.
// extract the whole TO..END
if( first_to==before_a )
{
DEUSE( input );
return new_error( ERROR_EMPTY_TO_END, first_to );
}
atom_t word = new_subword(WORD(CAR(first_to)),STRING(CAR(first_to)),(STRING(CAR(a))-STRING(CAR(first_to))+LENGTH(word_end)));
//atom_t word = empty_list;
first_to = behead(first_to);
//first_to = CDR(first_to);
CDR(before_a) = empty_list;
before_a = before_to;
a = behead(a);
if( before_to )
CDR(before_a) = a; // TO is not first item
else
input = a; // TO is first item
#ifdef DEBUG_TO_END
printf("<TO-END> EXTRACT:");
dumpln( first_to );
printf("<TO-END> CURRENT INPUT:");
dumpln( input );
printf("<TO-END> DEFINE IN PARENT:");
dumpln( parent );
#endif
// create new command/function
atom_t var = define_user_function( word, (first_to), parent );
DEUSE( word );
if( IS_ERROR(var) )
{
DEUSE( input );
return var;
}
DEUSE( first_to );
// process recursively the function
// for nested to-end definitions
BODY(var) = preparse( BODY(var), var, level+1 );
// next item is set explicitely,
SET_FLAGS( a, FLAG_NEWLINE );
first_to = NULL;
before_to = NULL;
continue;
}
}
}
// next item
before_a = a;
a = CDR(a);
}
// TO without END ?
if( first_to )
{
atom_t result = new_error( ERROR_INCOMPLETE_PAIR, CAR(first_to) );
DEUSE(input);
return result;
}
#ifdef DEBUG_TO_END
printf("<TO-END> LEFTOVERS=");
dumpln( input);
printf("\n");
#endif
return input;
}
| input | unparsed list |
| parent | parent var in which parsing is done |
| top_level | flag whether parsing is at its top-level |
Parses a list into a tree (aka LISP notation). The list is supposed to be tokenized as commands. The parent variable provides a context of variables which can be referenced from the parsed input.
running_compiled_code && bug #3442773
{
atom_t vars[MAX_ELEMS];
atom_t data[MAX_ELEMS];
atom_t poses[MAX_ELEMS]; // source position (if available)
int pris[MAX_ELEMS];
int usedby[MAX_ELEMS];
int largs[MAX_ELEMS]; // number of unlinked left arguments
int rargs[MAX_ELEMS]; // number of unlinked right arguments
int count; // count of elements in above arrays
atom_t aux = empty_list; // additional lists to delete
//printf("parse==");dumpln(input);
#ifdef DEBUG_PARSE
void dump_arrays( )
{
int i;
for( i=0; i<count; i++ ) if( pris[i]>=PRIORITY_MIN )
{
printf("<PARSE> %2d.[used by %2d; free=%d:%d] v%c0 pr=%d =",
i, usedby[i], largs[i], rargs[i], vars[i]?'#':'=', pris[i]);
dump_atom(data[i],1);
if( !IS_UNBOUND(poses[i]) )
{
printf(" @ ");
dump_atom(poses[i],1);
}
printf("\n");
}
printf("\n");
}
#endif
// Finds the first line of input. Return a pointer
// to the first node after the line. Sets arrays
// vars[], data[] and prios[].
atom_t get_line( atom_t input )
{
int has_no_infinite = 1;
count = 0;
while( IS_NOT_EMPTY(input) )
{
atom_t elem = USE(CAR(input));
//printf("testing element ");
//dumpln(elem);
#ifdef SAFEMODE
assert( count<MAX_ELEMS );
#endif
data[count] = elem;
vars[count] = IS_ANY_WORD(elem) ? find_var(elem,parent) : NULL;
if( VAR && !IS_COMMAND(VAR) && !IS_FUNCTION(VAR) )
{
// If we have variable called 4 (e.g. MAKE 4 "BOZA)
// then VAR!=NULL, but this is allowed only for
// functions and commands, so set VAR=NULL!
VAR=NULL;
}
pris[count] = (VAR ? PRIORITY(VAR) : PRIORITY_MAX);
usedby[count] = -1;
largs[count] = VAR ? LARGS(VAR) : 0;
rargs[count] = VAR ? RARGS(VAR) : 0;
poses[count] = IS_EXTENDED(input)?POS(input):unbound;
if( VAR && IS_VARIABLE(VAR) )
{
printf("found variable as function: ");
dump_atom(NAME(VAR),1);
printf("\n");
}
// if element is a word not starting with ":" and
// cannot be transfered into a number and does not
// exist as a variable, then this is unknown function
float64_t x;
if( IS_ANY_WORD(elem) &&
!VAR &&
*STRING(elem)!=TEXT(':') &&
*STRING(elem)!=TEXT('"') &&
!atom_to_float(elem,&x) )
{
//DEUSE( input );
DEUSE( elem );
return new_error( ERROR_DO_NOT_KNOW, elem );
}
if( !top_level && VAR && has_no_infinite )
{
if( GET_FLAGS(VAR,FLAG_INFINITE_ARGS) )
{
rargs[count]=-1;
has_no_infinite = 0;
}
}
// test for unary
if( VAR && GET_FLAGS(VAR,FLAG_CAN_BE_UNARY) && largs[count]==1 )
{
// case 1: <spc> op <spc> -> binary
// case 2: <spc> op -> unary
// case 3: op <spc> -> binary
// case 4: op -> binary
if( GET_FLAGS(input,FLAG_AFTER_SPACE|FLAG_BEFORE_SPACE)
== FLAG_AFTER_SPACE )
largs[count]=0;
}
count++;
//next:
input = CDR(input);
// if the next item is on a new line AND if it is
// not constant-list AND we are not in a sublist
// then break the line here
if( top_level && IS_NOT_EMPTY(input) && GET_FLAGS(input,FLAG_NEWLINE) &&
(!IS_LIST(CAR(input)) || IS_EXPRESSION(CAR(input))) )
break;
} // line while
return input;
} //get_line()
#ifdef DEBUG_PARSE
printf("<PARSE> ENTER=");
dumpln( input);
#endif
//-----------------------------------------
// STEP 1: Convert (..) into subexpressions
//-----------------------------------------
if( IS_EMPTY(input) ) return input;
if( IS_ERROR(input) ) return input;
atom_t orig_input = input = parentheses( input );
if( IS_ERROR(input) ) return input;
#ifdef DEBUG_PARSE
printf("<PARSE> PARENTHESISEZ=");
dumpln( input);
#endif
// if there is no parent consider the root as parent
if( !parent ) parent = root;
//-----------------------------------------
// STEP 2-7: Convert list into lisp line-by-line
//-----------------------------------------
atom_t result = empty_list;
atom_t last = empty_list;
atom_t err = NULL;
int i,j,finished;
int* args;
#define LINK \
{ \
usedby[j] = i; \
args[i]--; \
}
//printf("%d is used by %d\n",j,i);
#define RELINK \
{ \
if( j<usedby[j] ) { largs[usedby[j]]++; finished=0; } \
if( j>usedby[j] ) { rargs[usedby[j]]++; finished=0; } \
LINK; \
}
while( IS_NOT_EMPTY(input) ) // main while
{
next_line:
#ifdef DEBUG_PARSE
printf("\n\n");
printf("<PARSE>------------------\n");
printf("<PARSE> START A NEW LINE \n");
printf("<PARSE>------------------\n");
#endif
//-----------------------------------
// STEP 2: Get a line from the input
//-----------------------------------
input = get_line( input );
if( IS_ERROR(input) )
{
err = input;
break;
}
#ifdef DEBUG_PARSE
printf("<PARSE> ORIGINAL PARSE_LIST:\n");
dump_arrays( );
#endif
//printf("<PARSE> LEFTOVER:"); dumpln(input);
//printf("------------------------------------\n");
//-----------------------------------
// STEP 3: Analyze and set dependencies between items
//-----------------------------------
finished = 0;
while( !finished )
{
finished = 1;
for( i=0; i<count; i++ )
{
// now process right parameters
args = rargs;
for( j=i+1; args[i] && j<count; j++ )
{
if( vars[j] && !IS_FUNCTION(vars[j]) )
break;
if( usedby[j]==-1 )
LINK
else
if( (usedby[j]>j || usedby[j]<i) &&
(pris[usedby[j]]<=pris[i]) )
RELINK;
} // for largs
// now process left parameters
args = largs;
for( j=i-1; args[i] && j>=0; j-- )
{
//if(vars[j])
//{
//printf("j=%d cond=%d var=",j,!IS_FUNCTION(vars[j]));
//dumpln(vars[j]);
//}
if( vars[j] && !IS_FUNCTION(vars[j]) )
break;
if( usedby[i]==j ) break;
if( usedby[j]==-1 )
{
/*if( usedby[i]!=j )*/ LINK; // skip direct circular references
}
else
if( (usedby[j]<j || usedby[j]>i) &&
(pris[usedby[j]]<pris[i]) )
{
/*if( usedby[i]!=j )*/ RELINK; // skip direct circular reference
}
} // for largs
} // for each element
} // while not finished
#ifdef DEBUG_PARSE
printf("<PARSE> FINAL PARSE_LIST:\n");
dump_arrays( );
#endif
//-----------------------------------
// STEP 4: Check for extra or missing arguments
//-----------------------------------
#ifdef DEBUG_PARSE
printf("<PARSE> START STEP 4:\n");
dump_arrays( );
#endif
args = rargs;
int top_level_pos = -1;
for( j=0; j<count; j++ )
{
// check whether there are two or more free items
// only one is allowed for non-top levels
if( usedby[j]==-1 )
{
if( !top_level && top_level_pos>=0 )
{
// we are ready to announce that there are extra values
// but before doing this check whether there is function
// that would be so polite to take any of them
//printf(">>> top_level_pos=%d\n",top_level_pos);
if( rargs[top_level_pos]==0 &&
vars[top_level_pos] &&
GET_FLAGS(vars[top_level_pos],FLAG_MAY_HAVE_EXTRA_ARG) )
{
i = top_level_pos;
//printf(">>> attach %d to %d\n",j,i);
//printf(">>> rargs
LINK;
//printf(">>>>>>>"); dumpln(NAME(vars[top_level_pos]));
}
else
{
err = new_error( ERROR_CROWDED_EXPRESSION, data[j] );
break;
}
}
if( top_level_pos<0 ) top_level_pos = j;
}
// left paramers can be skipped only for unary functions
if( largs[j] && !GET_FLAGS(vars[j],FLAG_CAN_BE_UNARY) )
{
err = new_error( ERROR_MISSING_LEFTS, data[j] );
break;
}
// some functions may ignore 1 missing right parameter
if( rargs[j]==1 && GET_FLAGS(vars[j],FLAG_MAY_SKIP_LAST_ARG) )
continue;
// when arguments can be infinite, this also mean any number
if( rargs[j] && !GET_FLAGS(vars[j],FLAG_INFINITE_ARGS) )
{
//printf("rargs=%d\n",rargs[j]);
err = new_error( ERROR_MISSING_RIGHTS, data[j] );
break;
}
}
if( err ) break;
//-----------------------------------
// STEP 5: Recursivelly process nested expressions
// and arguments which must be lisp'ed too
//-----------------------------------
#ifdef DEBUG_PARSE
printf("<PARSE> START STEP 5:\n");
dump_arrays( );
#endif
for( i=0; i<count; i++ ) if( IS_LIST(data[i]) )
{
if( GET_FLAGS(data[i],FLAG_EXPRESSION) )
{ // nested expression
atom_t expr = parse( data[i], parent, 0 );
if( IS_ERROR(expr) )
{
err = expr;
}
else
{
DEUSE( data[i] );
data[i] = expr;
SET_FLAGS( data[i], FLAG_EXPRESSION );
}
}
else
{ // possibly argument to be reparsed
j=usedby[i];
if( (j>=0) && vars[j] && GET_FLAGS(vars[j],FLAG_PROCESS_ARGS) )
{
atom_t expr = build_syntax_subtree( parent, data[i] );
if( IS_ERROR(expr) )
{
err = expr;
}
else
{
DEUSE( data[i] );
data[i] = expr;
SET_FLAGS( data[i], FLAG_EXPRESSION+FLAG_WAS_LIST_CONST );
}
}
}
}
if( err ) break;
//-----------------------------------
// STEP 6: Group items of subexpressions, process numbers
//-----------------------------------
#ifdef DEBUG_PARSE
printf("<PARSE> START STEP 6:\n");
dump_arrays( );
#endif
for( i=0; i<count; i++ )
{
/*
// convert all numeric data into numbers
float64_t n;
if( IS_ANY_WORD(data[i]) && atom_to_float(data[i],&n) )
{
DEUSE( data[i] );
data[i] = new_float( n );
}
// create a list node for each element
// whatever the element is, this list node will be used
data[i] = new_list( data[i], empty_list );
SET_FLAGS( data[i], FLAG_EXPRESSION );
*/
// create a list node for each element
// whatever the element is, this list node will be used
float64_t n;
if( IS_ANY_WORD(data[i]) && atom_to_float(data[i],&n) )
{
atom_t pos = data[i]; // old source of the number
data[i] = new_list_ex( new_float(n), empty_list );
POS( data[i] ) = pos;
}
else
{
data[i] = new_list( data[i], empty_list );
}
SET_FLAGS( data[i], FLAG_EXPRESSION );
}
for( i=0; i<count; i++ )
{
if( usedby[i]>=0 )
{
atom_t a = data[usedby[i]];
atom_t name = CAR(data[i]);
atom_t function = vars[usedby[i]];
//printf("name(%d)=",i);dumpln(name);
//printf("function(%d)=",usedby[i]);dumpln(function);
//printf("a(%d)=",usedby[i]);dumpln(a);
// if the item is word constant and is used by
// a function which creates variables, then do
// create the variables now.
if( GET_FLAGS(function,FLAG_SET_ALL_VARS)
&& IS_ANY_WORD(name)
&& LENGTH(name)>1
&& *STRING(name)==TEXT('"') )
{
//printf("create local "); dumpln(name);
//printf(" in "); dumpln(NAME(parent));
atom_t var = new_local_var( name, parent, 1 );
if( IS_ERROR(var) ) err = var;
SET_FLAGS( var, FLAG_VARIABLE );
}
// if the item is word constant and is used by
// a function which creates a single variable
// (like MAKE), then do create the variable as
// global if it does not exist
// 2010.06.26: create the variable only if the
// parsing has not been activated at run-time.
atom_t real_name = NULL;
if( !running_compiled_code
&& GET_FLAGS(function,FLAG_SET_ONE_VAR)
&& (i==usedby[i]+1)
&& (IS_INTEGER(name) || IS_FLOAT(name)) )
{
real_name = atom_to_word( name );
goto use_numeric_name;
}
if(
GET_FLAGS(function,FLAG_SET_ONE_VAR)
&& (i==usedby[i]+1)
&& IS_ANY_WORD(name)
&& LENGTH(name)>1
&& *STRING(name)==TEXT('"') )
{
real_name = new_subword( name, STRING(name)+1, LENGTH(name)-1 );
use_numeric_name:
if( !find_var( real_name, parent ) )
{
atom_t var = new_local_var( real_name, parent/*root*/, 0 ); //printf("!2087!");
if( IS_ERROR(var) ) err = var;
SET_FLAGS( var, FLAG_VARIABLE );
}
DEUSE( real_name );
// patch for FOR command. If the control
// variable is ABC then create ABC and
// ^ABC
if( ADDRESS(function)==(int)rt_for )
{
assert( IS_ANY_WORD(name) );
atom_t real_name = new_word( STRING(name), LENGTH(name) );
*STRING(real_name) = L'^';
if( !find_var( real_name, parent ) )
{
atom_t var = new_local_var( real_name, parent/*root*/, 0 ); //printf("!2103!");
if( IS_ERROR(var) ) err = var;
SET_FLAGS( var, FLAG_VARIABLE );
}
DEUSE( real_name );
}
}
// if the item is word constant and is used by
// a function which load libraries, then do
// load the library now.
if( ADDRESS(function)==(int)rt_load
&& IS_ANY_WORD(name)
&& LENGTH(name)>1
&& *STRING(name)==TEXT('"') )
{
atom_t real_name = new_word( STRING(name)+1, LENGTH(name)-1 );
atom_t word = read_word( STRING(real_name) );
DEUSE( real_name );
if( IS_ERROR(word) ) { err = USE(word); break; }
atom_t trimmed = trim_shell_comment( word );
DEUSE( word );
atom_t tokens1 = tokenize( trimmed, TOKENIZE_DATA );
DEUSE( trimmed );
if( IS_ERROR(tokens1) )
{
err = tokens1;
break;
}
atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
DEUSE( tokens1 );
if( IS_ERROR(tokens2) ) return tokens2;
tokens2 = preparse( tokens2, root, LEVEL(root) );
atom_t body = parentheses( tokens2 );
DEUSE( tokens2 );
//printf("BODY=");dumpln(body);
//printf("input=");dumpln(input);
if( IS_NOT_EMPTY(body) )
{
atom_t a;
for( a = body; IS_NOT_EMPTY(CDR(a)); a=CDR(a) );
CDR(a) = USE(input);
input = body;
}
//printf("old aux="); dumpln(aux);
aux = new_list( body, aux );
//printf("new aux="); dumpln(aux);
for( i=0; i<count; i++ ) DEUSE( data[i] );
goto next_line;
}
SET_FLAGS( a, FLAG_EXPRESSION );
while( IS_NOT_EMPTY(CDR(a)) ) a=CDR(a);
CDR(a) = vars[i] ? new_list(data[i],empty_list) : data[i];
}
}
if( err ) break;
//-----------------------------------
// STEP 7: Group top-most items into result list
//-----------------------------------
#ifdef DEBUG_PARSE
printf("<PARSE> START STEP 7:\n");
dump_arrays( );
#endif
//printf("data[0]=");dumpln(data[0]);
//printf("vars[0]=");dumpln(vars[0]);
for( i=0; i<count; i++ )
if( usedby[i]==-1 )
{
//printf("i=%d var=%x is_expr=%d\n",i,vars[i],IS_EXPRESSION(CAR(data[i])));
//boza if( !vars[i] && !IS_EXPRESSION(CAR(data[i])))
//boza {
//printf("i=%d\n",i);
//boza err = new_error_atom( ERROR_UNUSED_VALUE, data[i] );
//boza break;
//boza }
if( IS_UNBOUND(poses[i]) )
{
append( data[i], &result, &last );
}
else
{
append_ex( data[i], &result, &last );
DEUSE( POS(last) );
POS( last ) = USE( poses[i] );
}
}
if( err ) break;
} // main line
//-----------------------------------------
// STEP pre-8: Test for error and release unused resources
//-----------------------------------------
DEUSE( orig_input );
DEUSE( aux );
if( err )
{
//printf("ERRRRRRROR\n");
for( i=0; i<count; i++ ) DEUSE( data[i] );
DEUSE( result );
return err;
}
//-----------------------------------
// STEP 8: Remove unnecessary parentheses
//-----------------------------------
//printf("before=");dumpln(result);
//
// step 8 removed, because in some cases it removes more
// parentheses than needed - e.g. when function body has
// only one command
// while( IS_EMPTY(CDR(result)) &&
// IS_LIST(CAR(result)) &&
// GET_FLAGS(CAR(result),FLAG_EXPRESSION) )
// {
// atom_t a = CAR(result);
// CAR(result) = empty_list;
// DEUSE( result );
// result = a;
// }
//
//printf(" after=");dumpln(result);
if( !top_level ) SET_FLAGS( result, FLAG_EXPRESSION );
#ifdef DEBUG_PARSE
printf("<PARSE> EXIT=");
dumpln(result);
#endif
if( OPTION_DUMP_AST )
{
outter( TEXT("Abstract Syntax Tree:\n\0"), UNKNOWN );
dumpln( result );
outter( TEXT("\n\0"), UNKNOWN );
}
return result;
}
| atom_t build_syntax_tree | ( | atom_t | func | ) |
| func | var atom for the parse context |
Parses completely a function. Its source is stored in its body as word, subword, data-tokenized list or command-tokenized list. Building algorithm:
TO ... ENDs and create them as subfunctions{
// exit is function is already treefied
if( IS_NOT_EMPTY(TREE(func)) ) return empty_list;
//printf("BUILD_SYNTAX_TREE(FUNC=");
//dump(NAME(func));
//printf(",SOURCE=");
//dump(SOURCE(func));
//printf(",BODY=");
//dump(BODY(func));
//printf(",LOCALS=");
//dump(LOCALS(func));
//printf(")\n\n");
if( IS_EMPTY(BODY(func)) )
{
// Step 1. Tokenize
//printf("SOURCE="); dumpln(SOURCE(func));
atom_t tokens1 = tokenize( SOURCE(func), TOKENIZE_DATA );
if( IS_ERROR(tokens1) ) return tokens1;
//printf("TOKENS1="); dumpln(tokens1);
atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
DEUSE( tokens1 );
if( IS_ERROR(tokens2) ) return tokens2;
//printf("TOKENS2="); dumpln(tokens2);
// Step 2. Extract TO..END's
atom_t body = preparse( tokens2, func, LEVEL(func) );
if( IS_ERROR(body) ) return body;
DEUSE( BODY(func) );
BODY(func) = body;
//printf("BODY="); dumpln(BODY(func));
}
// Step 3. Parse function body
//printf("BODY="); dumpln(BODY(func));
atom_t tree = parse( BODY(func), func, 1 );
if( IS_ERROR(tree) ) return tree;
DEUSE(TREE(func));
TREE(func) = tree;
//printf("TREE="); dumpln(TREE(func));
// because the might be some new TO..ENDs
// scan all locals and build those which
// have no trees
atom_t local;
atom_t locals;
for( locals=LOCALS(func); IS_NOT_EMPTY(locals); locals=CDR(locals) )
{
local = CAR(locals);
if( !DESCR2(local) ) continue;
atom_t x = build_syntax_tree( local );
if( IS_ERROR(x) ) return x;
}
return empty_list;
}