Lhogho
0.0.027
|
Defines | |
#define | GET_CHAR ch=*source |
get current character | |
#define | VAR vars[count] |
shortens access to vars array | |
#define | LINK |
#define | RELINK |
Tokentypes | |
#define | TOKEN_END 0 |
no more tokens available | |
#define | TOKEN_WORD 1 |
normal word token | |
#define | TOKEN_DIRTY_WORD 2 |
word token with unhandled backslashs or bars | |
#define | TOKEN_OPEN 3 |
open square bracket token | |
#define | TOKEN_CLOSE 4 |
close square bracket token | |
#define | TOKEN_LINEEND 5 |
end of line token | |
#define | TOKEN_SPACE 6 |
whitespace token | |
#define | TOKEN_ERROR 7 |
error occured | |
Parse modes | |
#define | MODE_ENTRY 0 |
entry mode | |
#define | MODE_SPACE 1 |
space mode | |
#define | MODE_WORD 2 |
word mode | |
#define | MODE_BARRED 3 |
barred mode | |
#define | MODE_BACKSLASHED 4 |
backslashed mode | |
#define | MODE_TILDE 5 |
tilde mode | |
#define | MODE_SEMITILDE 6 |
semitilde mode | |
#define | MODE_SEMICOLON 7 |
semicolon mode | |
#define | MODE_TILDESPACE 8 |
tildespace mode | |
#define | MODE_LESS 9 |
less mode | |
#define | MODE_GREATER 10 |
greater mode | |
#define | MAX_MODE 11 |
greatest mode + 1 | |
Microprogram defines (basic) | |
#define | PAT_TILDE 0x000001 |
push tilde | |
#define | PAT_PUSH 0x000002 |
push last character | |
#define | PAT_NEXT 0x000004 |
goto next character | |
#define | PAT_DIRTY 0x000008 |
set dirty flag | |
#define | PAT_MUTATED 0x000010 |
set mutated flag | |
#define | PAT_RETURN_TO_WORD 0x000020 |
next return will return to word mode | |
#define | PAT_RETURN_TO_SELF 0x000040 |
next return will return to current mode | |
#define | PAT_GOTO 0x000080 |
follows 4bit go-to-mode | |
#define | PAT_TOKEN 0x000100 |
follows 4bit token_id | |
#define | PAT_RETURN 0x000200 |
return to stack-to-mode | |
#define | PAT_ERROR_POS 0x000400 |
remember current position as error position | |
#define | PAT_ERROR 0x000800 |
generate error | |
#define | PAT_RETURN_TO_CALLER 0x001000 |
next return will return to the caller of this mode | |
#define | PAT_NEXT2 0x002000 |
goto next next character | |
Microprogram defines (compound) | |
#define | PAT_SHIFT 16 |
shift factor | |
#define | PAT_TOKEN_END PAT_TOKEN+(TOKEN_END<<PAT_SHIFT) |
microprogram for end token | |
#define | PAT_TOKEN_SPACE PAT_TOKEN+(TOKEN_SPACE<<PAT_SHIFT) |
microprogram for space token | |
#define | PAT_TOKEN_WORD PAT_TOKEN+(TOKEN_WORD<<PAT_SHIFT) |
microprogram for word token | |
#define | PAT_TOKEN_LINE PAT_TOKEN+(TOKEN_LINEEND<<PAT_SHIFT) |
microprogram for line end token | |
#define | PAT_TOKEN_OPEN PAT_TOKEN+(TOKEN_OPEN<<PAT_SHIFT) |
microprogram for open token | |
#define | PAT_TOKEN_CLOSE PAT_TOKEN+(TOKEN_CLOSE<<PAT_SHIFT) |
microprogram for close token | |
#define | PAT_GOTO_SPACE PAT_GOTO+(MODE_SPACE<<PAT_SHIFT) |
microprogram for goto space mode | |
#define | PAT_GOTO_WORD PAT_GOTO+(MODE_WORD<<PAT_SHIFT) |
microprogram for goto word mode | |
#define | PAT_GOTO_BACKSLASHED PAT_GOTO+(MODE_BACKSLASHED<<PAT_SHIFT) |
microprogram for goto backslashed mode | |
#define | PAT_GOTO_BARRED PAT_GOTO+(MODE_BARRED<<PAT_SHIFT) |
microprogram for goto barred mode | |
#define | PAT_GOTO_SEMICOLON PAT_GOTO+(MODE_SEMICOLON<<PAT_SHIFT) |
microprogram for goto semicolon mode | |
#define | PAT_GOTO_TILDE PAT_GOTO+(MODE_TILDE<<PAT_SHIFT) |
microprogram for goto tilde mode | |
#define | PAT_GOTO_TILDESPACE PAT_GOTO+(MODE_TILDESPACE<<PAT_SHIFT) |
microprogram for goto tildespace mode | |
#define | PAT_GOTO_SEMITILDE PAT_GOTO+(MODE_SEMITILDE<<PAT_SHIFT) |
microprogram for goto semitilde mode | |
#define | PAT_GOTO_LESS PAT_GOTO+(MODE_LESS<<PAT_SHIFT) |
microprogram for goto less mode | |
#define | PAT_GOTO_GREATER PAT_GOTO+(MODE_GREATER<<PAT_SHIFT) |
microprogram for goto greater mode | |
Special characters | |
When a chracter is barrable and is actually barred then its code is changed. For example codes for space and barred space are different for Lhogho, but should appear the same for users. Barrable characters are ()+- | |
#define | MAX_ELEMS 128 |
maximal number of elements in a statement | |
char_t | debar [32] |
table for a->|a| conversions | |
atom_t | build_syntax_subtree (atom_t function, atom_t source) |
parses sublist of commands | |
void | init_parser () |
initializes parser | |
atom_t | tokenize (atom_t input, int method) |
tokenizes into a list | |
atom_t | trim_shell_comment (atom_t word) |
trims shell comment (if any) | |
atom_t | purify (atom_t word) |
purifies a word | |
atom_t | parentheses (atom_t input) |
processes parentheses | |
atom_t | define_user_function (atom_t source, atom_t input, atom_t parent) |
creates var atom for a user-defined function | |
atom_t | preparse (atom_t input, atom_t parent, int level) |
extracts all TO ... END definitions | |
atom_t | parse (atom_t input, atom_t parent, int top_level) |
parses a list into a tree | |
atom_t | build_syntax_tree (atom_t func) |
parses body of user-defined function |
#define GET_CHAR ch=*source |
#define TOKEN_END 0 |
#define TOKEN_WORD 1 |
#define TOKEN_DIRTY_WORD 2 |
#define TOKEN_OPEN 3 |
#define TOKEN_CLOSE 4 |
#define TOKEN_LINEEND 5 |
#define TOKEN_SPACE 6 |
#define TOKEN_ERROR 7 |
#define MODE_ENTRY 0 |
#define MODE_SPACE 1 |
#define MODE_WORD 2 |
#define MODE_BARRED 3 |
#define MODE_BACKSLASHED 4 |
#define MODE_TILDE 5 |
#define MODE_SEMITILDE 6 |
#define MODE_SEMICOLON 7 |
#define MODE_TILDESPACE 8 |
#define MODE_LESS 9 |
#define MODE_GREATER 10 |
#define MAX_MODE 11 |
#define PAT_TILDE 0x000001 |
#define PAT_PUSH 0x000002 |
#define PAT_NEXT 0x000004 |
#define PAT_DIRTY 0x000008 |
#define PAT_MUTATED 0x000010 |
#define PAT_RETURN_TO_WORD 0x000020 |
#define PAT_RETURN_TO_SELF 0x000040 |
#define PAT_GOTO 0x000080 |
#define PAT_TOKEN 0x000100 |
#define PAT_RETURN 0x000200 |
#define PAT_ERROR_POS 0x000400 |
#define PAT_ERROR 0x000800 |
#define PAT_RETURN_TO_CALLER 0x001000 |
#define PAT_NEXT2 0x002000 |
#define PAT_SHIFT 16 |
#define PAT_TOKEN_END PAT_TOKEN+(TOKEN_END<<PAT_SHIFT) |
#define PAT_TOKEN_SPACE PAT_TOKEN+(TOKEN_SPACE<<PAT_SHIFT) |
#define PAT_TOKEN_WORD PAT_TOKEN+(TOKEN_WORD<<PAT_SHIFT) |
#define PAT_TOKEN_LINE PAT_TOKEN+(TOKEN_LINEEND<<PAT_SHIFT) |
#define PAT_TOKEN_OPEN PAT_TOKEN+(TOKEN_OPEN<<PAT_SHIFT) |
#define PAT_TOKEN_CLOSE PAT_TOKEN+(TOKEN_CLOSE<<PAT_SHIFT) |
#define PAT_GOTO_SPACE PAT_GOTO+(MODE_SPACE<<PAT_SHIFT) |
#define PAT_GOTO_WORD PAT_GOTO+(MODE_WORD<<PAT_SHIFT) |
#define PAT_GOTO_BACKSLASHED PAT_GOTO+(MODE_BACKSLASHED<<PAT_SHIFT) |
#define PAT_GOTO_BARRED PAT_GOTO+(MODE_BARRED<<PAT_SHIFT) |
#define PAT_GOTO_SEMICOLON PAT_GOTO+(MODE_SEMICOLON<<PAT_SHIFT) |
#define PAT_GOTO_TILDE PAT_GOTO+(MODE_TILDE<<PAT_SHIFT) |
#define PAT_GOTO_TILDESPACE PAT_GOTO+(MODE_TILDESPACE<<PAT_SHIFT) |
#define PAT_GOTO_SEMITILDE PAT_GOTO+(MODE_SEMITILDE<<PAT_SHIFT) |
#define PAT_GOTO_LESS PAT_GOTO+(MODE_LESS<<PAT_SHIFT) |
#define PAT_GOTO_GREATER PAT_GOTO+(MODE_GREATER<<PAT_SHIFT) |
#define MAX_ELEMS 128 |
#define LINK |
{ \ usedby[j] = i; \ args[i]--; \ }
#define RELINK |
{ \ if( j<usedby[j] ) { largs[usedby[j]]++; finished=0; } \ if( j>usedby[j] ) { rargs[usedby[j]]++; finished=0; } \ LINK; \ }
atom_t build_syntax_subtree | ( | atom_t | function, |
atom_t | source | ||
) |
function | var atom for the parse context |
source | word or list containing the source |
Parses a list of commands (like these in IF or REPEAT) and build an abstract syntax tree:
TO
... ENDs
and create them as subfunctions{ //printf("BUILD_SYNTAX_SUBTREE("); //dump(NAME(function)); //printf(","); //dump(source); //printf(")\n\n"); // tokenize atom_t tokens1 = tokenize( source, TOKENIZE_DATA ); if( IS_ERROR(tokens1) ) return tokens1; //printf("tokens1="); dumpln(tokens1); atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS ); DEUSE( tokens1 ); if( IS_ERROR(tokens2) ) return tokens2; //printf("tokens2="); dumpln(tokens2); // extract TO..END's atom_t body = preparse( tokens2, function, LEVEL(function) ); if( IS_ERROR(body) ) return body; //printf("body="); dumpln(body); // parse function body atom_t tree = parse( body, function, 1 ); if( IS_ERROR(tree) ) return tree; //printf("tree="); dumpln(tree); DEUSE( tokens2 ); //DEUSE( body ); return tree; }
void init_parser | ( | ) |
Initializes tables enbar[] and debar[] which are used to enbar and debar a character.
{ int i; // NOTE! if the string of enbarrable characters is // changed, also change dump_word() chars_t s = TEXT("______________()+-*/=<>|?_______"); //locked-> x........xx..x.................. for( i=0; i<32; i++ ) debar[i]=i; // identity matrix for( i=0; i<128; i++ ) enbar[i]=i; // identity matrix for( i=0; *s; i++,s++ ) { if( *s==TEXT('_') ) continue; debar[i] = *s; enbar[(unsigned char)*s] = i; } }
input | word, subword or list to be tokenized |
method | method of tokenization |
Tokenizes a word, a subword or a list into a list. If possible makes all words as subwords. Backslashes and bars in words are preserved (i.e. words are not purified). Comments and line continuations are ignored.
If the method is TOKENIZE_DATA then the input is tokenized as if it contains Logo data. If the method is TOKENIZE_COMMANDS then the input is tokenized as if it contains Logo commands. If the method is TOKENIZE_READWORD then the input is tokenized as expected by READWORD function.
If the input is a list then all its elements are tokenized one-by-one.
Return value is the tokenized list. In there is an error, returns an error atom which error code is ERROR_INCOMPLETE_PAIR, error position points the position in the word (0-based) and the error source is the word itself.
{ if( IS_LIST(input) ) { if( method==TOKENIZE_DATA && GET_FLAGS( input, FLAG_TOKENIZED_DATA|FLAG_TOKENIZED_COMMANDS ) ) return USE(input); if( method==TOKENIZE_COMMANDS && GET_FLAGS( input, FLAG_TOKENIZED_COMMANDS ) ) return USE(input); } if( IS_FLOAT(input) ) { return USE(input); } //printf("\n"); //if(method==TOKENIZE_DATA) // printf("ENTER TOKENIZE_DATA("); //else // printf("ENTER TOKENIZE_COMMANDS("); //dump_atom(input,1); //printf(")\n"); #ifdef SAFEMODE assert( IS_WORD(input) || IS_SUBWORD(input) || IS_LIST(input) ); #endif // First check whether the input is a list. // If it is then tokenize recursively all its elements. if( IS_LIST(input) ) { atom_t result = empty_list; atom_t last = empty_list; atom_t x; atom_t y; for( x=input; IS_NOT_EMPTY(x); x=CDR(x) ) { //printf("@@@@@@@@PROCESSING=|"); dump_atom(CAR(x),1); printf("|\n"); int submethod = IS_LIST(CAR(x))?TOKENIZE_DATA:method; atom_t element = tokenize( CAR(x), submethod ); //printf("@@@@@@@@TOKENIZED_INTO=|"); dump_atom(element,1); printf("|\n"); if( IS_ERROR(element) ) { DEUSE( result ); //DEUSE( last ); result = element; break; } int initial_flags = GET_FLAGS( x, FLAG_NEWLINE|FLAG_AFTER_SPACE ); int final_flags = GET_FLAGS( x, FLAG_BEFORE_SPACE ); //printf(">>>CAR(x) = "); dump_atom(CAR(x),1); printf("\n"); //printf(">>>element = "); dump_atom(element,1); printf("\n"); if( IS_FLOAT(element) ) { append( USE(element), &result, &last ); SET_FLAGS( last, initial_flags|final_flags ); } else if( IS_LIST(CAR(x)) ) { if( IS_EXTENDED(x) ) { append_ex( USE(element), &result, &last ); DEUSE( POS(last) ); POS( last ) = USE( POS(x) ); } else append( USE(element), &result, &last ); SET_FLAGS( last, initial_flags|final_flags ); } else { for( y=element; IS_NOT_EMPTY(y); y=CDR(y) ) { //printf("APPEND SUBELEMENT |"); //dump_atom(CAR(y),1); //printf("|\n"); if( IS_EXTENDED(y) ) { append_ex( USE(CAR(y)), &result, &last ); DEUSE( POS(last) ); POS( last ) = USE( POS(y) ); } else append( USE(CAR(y)), &result, &last ); SET_FLAGS( last, FLAGS(y) ); if( y==element ) SET_FLAGS( last, initial_flags ); } #ifdef SAFE_MODE assert( IS_NOT_EMPTY(last) ); #endif SET_FLAGS( last, final_flags); } DEUSE( element ); } //printf("FINAL RESULT IS |"); dump_atom(result,1); printf("|\n"); return result; } // The input is a word or a subword chars_t source = STRING(input); int len = LENGTH(input); int origlen = len; chars_t buffer = ALLOC( CHAR_SIZE*len ); // buffer for the longest word //chars_t bp = buffer; char_t ch; int_t errpos = -1; //char_t errchar = NULL_CHAR; int last_token = TOKEN_LINEEND; //int crlf = 0; // Gets the next token. Return: // TOKEN_END if there are no more tokens // TOKEN_WORD if the token is a word // TOKEN_DIRTY_WORD if the token is a word with \ or | // TOKEN_OPEN if the token is [ // TOKEN_CLOSE if the token is ] // TOKEN_LINEEND if the token is <nl> // TOKEN_SPACE if at least one whitespace is met //int co=0; int get_token( atom_t *token, int method ) { //co++; //if (0 == co%1024) //{ //printf("%d ",co); //} // return 1 if buffer contains number int is_number(chars_t bp) { // "E" {digit}* "." {digit}+ // 1 2 3 4 chars_t cp = bp; int num_mode = 1; char_t ch; cp = bp; if( bp==buffer ) return 0; while( cp>buffer ) { cp--; ch = *cp; //printf("num_mode=%d ch=%C\n",num_mode,ch); switch( num_mode ) { case 1: if( ch!=TEXT('E') && ch!=TEXT('e') ) return 0; num_mode = 2; break; case 2: ; if( ch<TEXT('0') || ch>TEXT('9') ) return 0; num_mode = 3; break; case 3: ; if( ch!=TEXT('.') ) return 0; num_mode = 4; break; case 4: ; if( ch<TEXT('0') || ch>TEXT('9') ) return 0; break; } //printf("num_mode=%d\n",num_mode); } return 1; } if( !len ) return TOKEN_END; int dirty = 0; int mutated = 0; chars_t bp = buffer; *bp=NULL_CHAR; chars_t sp = source; int mode = MODE_ENTRY; // current mode int code; // action code int stack[MAX_MODE]; // return-to-mode for each mode static int mode_eof[MAX_MODE] = { /* entry */ PAT_TOKEN_END, /* whitespace */ PAT_TOKEN_SPACE, /* word */ PAT_TOKEN_WORD, /* barred */ PAT_ERROR, /* backslashed */ PAT_ERROR, /* tilde */ PAT_ERROR, /* semitilde */ PAT_ERROR, /* semicolon */ PAT_RETURN, /* tildespace */ PAT_ERROR, /* less */ PAT_TOKEN_WORD, /* greater */ PAT_TOKEN_WORD }; static int mode_eol[MAX_MODE] = { /* entry */ PAT_NEXT+PAT_TOKEN_LINE, /* whitespace */ PAT_TOKEN_SPACE, /* word */ PAT_TOKEN_WORD, /* barred */ PAT_PUSH+PAT_NEXT, /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN, /* tilde */ PAT_NEXT+PAT_RETURN, /* semitilde */ PAT_NEXT+PAT_RETURN, /* semicolon */ PAT_RETURN, /* tildespace */ PAT_NEXT+PAT_RETURN, /* less */ PAT_TOKEN_WORD, /* greater */ PAT_TOKEN_WORD }; static int mode_space[MAX_MODE] = { /* entry */ PAT_NEXT+PAT_GOTO_SPACE, /* whitespace */ PAT_NEXT, /* word */ PAT_TOKEN_WORD, /* barred */ PAT_PUSH+PAT_NEXT, /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN, /* tilde */ PAT_NEXT+PAT_GOTO_TILDESPACE, /* semitilde */ PAT_NEXT, /* semicolon */ PAT_NEXT, /* tildespace */ PAT_NEXT, /* less */ PAT_TOKEN_WORD, /* greater */ PAT_TOKEN_WORD }; static int mode_open[MAX_MODE] = { /* entry */ PAT_NEXT+PAT_TOKEN_OPEN, /* whitespace */ PAT_TOKEN_SPACE, /* word */ PAT_TOKEN_WORD, /* barred */ PAT_PUSH+PAT_NEXT, /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN, /* tilde */ PAT_TILDE+PAT_RETURN, /* semitilde */ PAT_RETURN, /* semicolon */ PAT_NEXT, /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD, /* less */ PAT_TOKEN_WORD, /* greater */ PAT_TOKEN_WORD }; static int mode_close[MAX_MODE] = { /* entry */ PAT_NEXT+PAT_TOKEN_CLOSE, /* whitespace */ PAT_TOKEN_SPACE, /* word */ PAT_TOKEN_WORD, /* barred */ PAT_PUSH+PAT_NEXT, /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN, /* tilde */ PAT_TILDE+PAT_RETURN, /* semitilde */ PAT_RETURN, /* semicolon */ PAT_NEXT, /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD, /* less */ PAT_TOKEN_WORD, /* greater */ PAT_TOKEN_WORD }; static int mode_bar[MAX_MODE] = { /* entry */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_RETURN_TO_WORD+PAT_GOTO_BARRED+PAT_ERROR_POS, /* whitespace */ PAT_TOKEN_SPACE, /* word */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_GOTO_BARRED+PAT_RETURN_TO_SELF+PAT_ERROR_POS, /* barred */ PAT_PUSH+PAT_NEXT+PAT_RETURN, /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN, /* tilde */ PAT_TILDE+PAT_RETURN, /* semitilde */ PAT_RETURN, /* semicolon */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_GOTO_BARRED+PAT_RETURN_TO_SELF+PAT_ERROR_POS, /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD, /* less */ PAT_TOKEN_WORD, /* greater */ PAT_TOKEN_WORD }; static int mode_backslash[MAX_MODE] = { /* entry */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_RETURN_TO_WORD+PAT_GOTO_BACKSLASHED+PAT_ERROR_POS, /* whitespace */ PAT_TOKEN_SPACE, /* word */ PAT_PUSH+PAT_NEXT+PAT_DIRTY+PAT_RETURN_TO_SELF+PAT_GOTO_BACKSLASHED+PAT_ERROR_POS, /* barred */ PAT_PUSH+PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_BACKSLASHED, /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN, /* tilde */ PAT_TILDE+PAT_RETURN, /* semitilde */ PAT_RETURN, /* semicolon */ PAT_NEXT2, //+PAT_RETURN_TO_SELF+PAT_GOTO_BACKSLASHED, /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD, /* less */ PAT_TOKEN_WORD, /* greater */ PAT_TOKEN_WORD }; static int mode_tilde[MAX_MODE] = { /* entry */ PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_TILDE+PAT_ERROR_POS, /* whitespace */ PAT_TOKEN_SPACE, /* word */ PAT_NEXT+PAT_MUTATED+PAT_RETURN_TO_SELF+PAT_GOTO_TILDE+PAT_ERROR_POS, /* barred */ PAT_PUSH+PAT_NEXT, /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN, /* tilde */ PAT_TILDE+PAT_RETURN, /* semitilde */ PAT_RETURN, /* semicolon */ PAT_NEXT+PAT_MUTATED+PAT_RETURN_TO_CALLER+PAT_GOTO_SEMITILDE+PAT_ERROR_POS, /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD, /* less */ PAT_TOKEN_WORD, /* greater */ PAT_TOKEN_WORD }; static int mode_semicolon[MAX_MODE] = { /* entry */ PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_SEMICOLON, /* whitespace */ PAT_TOKEN_SPACE, /* word */ PAT_NEXT+PAT_MUTATED+PAT_RETURN_TO_SELF+PAT_GOTO_SEMICOLON, /* barred */ PAT_PUSH+PAT_NEXT, /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN, /* tilde */ PAT_TILDE+PAT_RETURN, /* semitilde */ PAT_RETURN, /* semicolon */ PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_SEMITILDE, /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD, /* less */ PAT_TOKEN_WORD, /* greater */ PAT_TOKEN_WORD }; static int mode_else[MAX_MODE] = { /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_WORD, /* whitespace */ PAT_TOKEN_SPACE, /* word */ PAT_PUSH+PAT_NEXT, /* barred */ PAT_PUSH+PAT_NEXT, /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN, /* tilde */ PAT_TILDE+PAT_RETURN, /* semitilde */ PAT_RETURN, /* semicolon */ PAT_NEXT, /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD, /* less */ PAT_TOKEN_WORD, /* greater */ PAT_TOKEN_WORD }; static int mode_parens[MAX_MODE] = { /* entry */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD, /* whitespace */ PAT_TOKEN_SPACE, /* word */ PAT_TOKEN_WORD, /* barred */ PAT_PUSH+PAT_NEXT, /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN, /* tilde */ PAT_TILDE+PAT_RETURN, /* semitilde */ PAT_RETURN, /* semicolon */ PAT_NEXT, /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD, /* less */ PAT_TOKEN_WORD, /* greater */ PAT_TOKEN_WORD }; static int mode_equal[MAX_MODE] = { /* entry */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD, /* whitespace */ PAT_TOKEN_SPACE, /* word */ PAT_TOKEN_WORD, /* barred */ PAT_PUSH+PAT_NEXT, /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN, /* tilde */ PAT_TILDE+PAT_RETURN, /* semitilde */ PAT_RETURN, /* semicolon */ PAT_NEXT, /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD, /* less */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD, /* greater */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD }; static int mode_less[MAX_MODE] = { /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_LESS, /* whitespace */ PAT_PUSH+PAT_NEXT+PAT_GOTO_LESS, //PAT_TOKEN_WORD, @boza /* word */ PAT_TOKEN_WORD, /* barred */ PAT_PUSH+PAT_NEXT, /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN, /* tilde */ PAT_TILDE+PAT_RETURN, /* semitilde */ PAT_RETURN, /* semicolon */ PAT_NEXT, /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD, /* less */ PAT_TOKEN_WORD, /* greater */ PAT_TOKEN_WORD }; static int mode_greater[MAX_MODE] = { /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_GREATER, /* whitespace */ PAT_PUSH+PAT_NEXT+PAT_GOTO_GREATER, //PAT_TOKEN_WORD, @boza /* word */ PAT_TOKEN_WORD, /* barred */ PAT_PUSH+PAT_NEXT, /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN, /* tilde */ PAT_TILDE+PAT_RETURN, /* semitilde */ PAT_RETURN, /* semicolon */ PAT_NEXT, /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD, /* less */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD, /* greater */ PAT_TOKEN_WORD }; again: // get action code GET_CHAR; #ifdef DEBUG_TOKENS printf("<TOKENS> length=%d\n",len); if(ch<TEXT(' ')) printf("<TOKENS> get(#%d)\t",ch); else printf("<TOKENS> get('%c')\t",ch); switch( mode ) { case MODE_ENTRY: printf(" ENTRY -> "); break; case MODE_SPACE: printf(" SPACE -> "); break; case MODE_WORD: printf(" WORD -> "); break; case MODE_BARRED: printf(" BARRED -> "); break; case MODE_BACKSLASHED: printf(" BACKSLASH -> "); break; case MODE_TILDE: printf(" TILDE -> "); break; case MODE_SEMITILDE: printf(" SEMITILDE -> "); break; case MODE_SEMICOLON: printf(" SEMICOLON -> "); break; case MODE_TILDESPACE: printf("TILDESPACE -> "); break; } #endif code = mode_else[mode]; if( !len ) code = mode_eof[mode]; else { if( method==TOKENIZE_READWORD ) { // tokenize as expected by READWORD if( ch==TEXT('|') ) code = mode_bar[mode]; else if( ch==TEXT('\\') ) code = mode_backslash[mode]; } else if( method==TOKENIZE_READLIST ) { // tokenize as data if( ch==TEXT('\n') ) code = mode_eol[mode]; else if( ch==TEXT('\r') ) code = mode_eol[mode]; else if( ch<=TEXT(' ') ) code = mode_space[mode]; else if( ch==TEXT('[') ) code = mode_open[mode]; else if( ch==TEXT(']') ) code = mode_close[mode]; else if( ch==TEXT('|') ) code = mode_bar[mode]; else if( ch==TEXT('\\') ) code = mode_backslash[mode]; else if( ch==TEXT('~') ) code = mode_tilde[mode]; } else if( method==TOKENIZE_DATA ) { // tokenize as data if( ch==TEXT('\n') ) code = mode_eol[mode]; else if( ch==TEXT('\r') ) code = mode_eol[mode]; else if( ch<=TEXT(' ') ) code = mode_space[mode]; else if( ch==TEXT('[') ) code = mode_open[mode]; else if( ch==TEXT(']') ) code = mode_close[mode]; else if( ch==TEXT('|') ) code = mode_bar[mode]; else if( ch==TEXT('\\') ) code = mode_backslash[mode]; else if( ch==TEXT('~') ) code = mode_tilde[mode]; else if( ch==TEXT(';') ) code = mode_semicolon[mode]; } else { // tokenize as commands //printf("ch=%c *buf=%c\n",ch,*buffer); if( ch==TEXT('(') || ch==TEXT(')') ) code = mode_parens[mode]; else if( *buffer!=TEXT('"') ) { if( (ch==TEXT('+') || ch==TEXT('-')) && is_number(bp) ) { } else if( ch==TEXT('+') || ch==TEXT('-') || ch==TEXT('*') || ch==TEXT('/')) code = mode_parens[mode]; else if( ch==TEXT('=') ) code = mode_equal[mode]; else if( ch==TEXT('<') ) code = mode_less[mode]; else if( ch==TEXT('>') ) code = mode_greater[mode]; } } } // process action code int _stack = -1; int newmode = 0; if( code & PAT_GOTO ) { // this is pred-processing of GOTO // if old mode was space, and new is not, // then update initial position of next token newmode = (code>>PAT_SHIFT) & 0xF; if( mode==MODE_SPACE && newmode!=MODE_SPACE ) { sp = source; } } if( code & PAT_TILDE ) { *bp++ = TEXT('~'); #ifdef DEBUG_TOKENS printf("\n<TOKENS> put('%c'/%d)\n",TEXT('~'),TEXT('~')); #endif //DEBUG_TOKENS } if( code & PAT_PUSH ) { // push a character only if: // - currently not in bars |..?..| // - currently in bars, but not in semicolon ;..|..?..| if( mode!=MODE_BARRED || (stack[mode]!=MODE_SEMITILDE && stack[mode]!=MODE_SEMICOLON) ) { //if( mode==MODE_BARRED || mode==MODE_BACKSLASHED ) //*bp++ = ENBAR(ch); //else *bp++ = ch; #ifdef DEBUG_TOKENS printf("\n<TOKENS> put('%c'/%d)\n",ch,ch); #endif //DEBUG_TOKENS //if( ch=='\r' && *(source+1)=='\n' ) // handle CRLF cases //{ //*bp++ = '\n'; //#ifdef DEBUG_TOKENS // printf("\n<TOKENS> put('%d')\n",'\n'); // #endif //DEBUG_TOKENS //} } } if( code & PAT_NEXT2 ) { source++; len--; } if( code & (PAT_NEXT|PAT_NEXT2) ) { if( *source=='\r' ) { //crlf = 0; if( *(source+1)=='\n' ) { //crlf = 1; source++; len--; } } source++; len--; } if( code & PAT_DIRTY ) dirty = 1; if( code & PAT_MUTATED ) mutated = 1; if( code & PAT_ERROR_POS ) errpos = origlen-len-1; if( code & PAT_RETURN_TO_WORD ) _stack = MODE_WORD; if( code & PAT_RETURN_TO_SELF ) _stack = mode; if( code & PAT_RETURN_TO_CALLER ) _stack = stack[mode]; if( code & PAT_GOTO ) { // this is post-processing of GOTO mode = newmode; stack[mode] = _stack; } if( code & PAT_RETURN ) mode = stack[mode]; if( code & PAT_TOKEN ) { int _token = (code>>PAT_SHIFT) & 0xF; if( _token!=TOKEN_WORD ) return _token; if( mutated ) { *bp = NULL_CHAR; *token = new_word( buffer, UNKNOWN ); #ifdef DEBUG_TOKENS printf("MUTATED TOKEN "); dumpln(*token); printf("\n\n"); #endif } else { *token = new_subword( input, sp, source-sp /*bp-buffer*/ ); #ifdef DEBUG_TOKENS printf("NORMAL TOKEN **"); dump(*token); printf("** (len=%d)\n\n\n",source-sp); #endif } return dirty?TOKEN_DIRTY_WORD:TOKEN_WORD; } if( code & PAT_ERROR ) { #ifdef DEBUG_TOKENS printf("ERROR\n"); #endif return TOKEN_ERROR; } #ifdef DEBUG_TOKENS switch( mode ) { case MODE_ENTRY: printf("ENTRY\n"); break; case MODE_SPACE: printf("SPACE\n"); break; case MODE_WORD: printf("WORD\n"); break; case MODE_BARRED: printf("BARRED\n"); break; case MODE_BACKSLASHED: printf("BACKSLASH\n"); break; case MODE_TILDE: printf("TILDE\n"); break; case MODE_SEMITILDE: printf("SEMITILDE\n"); break; case MODE_SEMICOLON: printf("SEMICOLON\n"); break; case MODE_TILDESPACE: printf("TILDESPACE\n"); break; } #endif goto again; } // get_token() atom_t get_sublist( int level, int full_parse, atom_t* pos ) //ex2// { atom_t result = empty_list; atom_t last = empty_list; if( pos ) *pos = NULL; //ex2// atom_t token = NULL; atom_t sublist_pos = NULL; //ex2// int flags; int bracketlen = len; // LEN of last opening bar int pos_from = source-STRING(input); //ex2// if( pos_from ) pos_from--; //ex2// flags = 0;//FLAG_NEWLINE; while( (last_token=get_token(&token,full_parse)) ) { sublist_pos = NULL; #ifdef DEBUG_TOKENIZATION switch(last_token) { case TOKEN_END: printf("TOKEN_END\n"); break; case TOKEN_SPACE: printf("TOKEN_SPACE\n"); break; case TOKEN_WORD: printf("TOKEN_WORD @"); dump(token); printf("@\n"); break; case TOKEN_DIRTY_WORD: printf("TOKEN_|WORD| @"); dump(token); printf("@\n"); break; case TOKEN_OPEN: printf("TOKEN_OPEN [\n"); break; case TOKEN_CLOSE: printf("TOKEN_CLOSE ]\n"); break; case TOKEN_LINEEND: printf("TOKEN_LINEEND\n"); break; case TOKEN_ERROR: printf("TOKEN_ERROR\n"); break; } #endif if( last_token==TOKEN_ERROR ) return result; if( last_token==TOKEN_SPACE ) { if( IS_NOT_EMPTY(last) ) SET_FLAGS( last, FLAG_BEFORE_SPACE ); flags |= FLAG_AFTER_SPACE; continue; } if( last_token==TOKEN_DIRTY_WORD ) { //printf("###BEFORE="); dumpln(token); atom_t x = purify( token ); DEUSE( token ); token = x; //printf("###AFTER="); dumpln(token); } if( last_token==TOKEN_CLOSE ) break; if( last_token==TOKEN_LINEEND ) { flags |= FLAG_NEWLINE; continue; } if( last_token==TOKEN_OPEN) { token = get_sublist( level+1, TOKENIZE_DATA, &sublist_pos ); // recursive //ex2// if( last_token==TOKEN_ERROR ) { DEUSE( token ); if( sublist_pos ) DEUSE(sublist_pos); sublist_pos = NULL; break; } } if( method==TOKENIZE_COMMANDS ) flags |= FLAG_TOKENIZED_COMMANDS; if( method==TOKENIZE_DATA ) flags |= FLAG_TOKENIZED_DATA; if( method==TOKENIZE_COMMANDS && last_token==TOKEN_WORD && LENGTH(token)>1 && *STRING(token)==TEXT('?') && *(STRING(token)+1)>=TEXT('0') && *(STRING(token)+1)<=TEXT('9') ) { //printf(">>>%d %d\n", last_token==TOKEN_WORD, last_token==TOKEN_DIRTY_WORD); // process template ?nn->(? nn) for command tokenization //printf("append token **"); dump(token); printf("**\n"); atom_t new_qoken = new_subword( token, STRING(token), 1 ); atom_t new_token = new_subword( token, STRING(token)+1, LENGTH(token)-1 ); DEUSE( token ); append( new_word(TEXT("("),-1), &result, &last ); // ( append( new_qoken, &result, &last ); // ? append( new_token, &result, &last ); // nn append( new_word(TEXT(")"),-1), &result, &last ); // ) } else { // normal token, no more processing needed if( sublist_pos ) //ex// { append_ex( token, &result, &last ); POS( last ) = sublist_pos; //printf("\n\nSET EXTENDED POSITION "); //dump_atom(sublist_pos,1); printf("\n"); //printf("CURRENT RESULT "); //dump_atom(result,1); printf("\n\n"); sublist_pos = NULL; } else { append( token, &result, &last ); } SET_FLAGS( last, flags ); } flags = 0; if( last_token==TOKEN_ERROR ) break; } // test for unmatching square brackets // i.e. ...[... or ...]... if( level ) { if( last_token==TOKEN_END ) { errpos = origlen-bracketlen-1; last_token = TOKEN_ERROR; } } else { if( last_token==TOKEN_CLOSE ) { errpos = origlen-len-1; last_token = TOKEN_ERROR; } } int pos_to = source-STRING(input); if( pos_to ) pos_to--; if( pos && method == TOKENIZE_DATA ) //ex2// { *pos = new_subword( input, STRING(input)+pos_from, pos_to-pos_from+1 ); } return result; } // get_sublist() atom_t result = get_sublist( 0, method, NULL ); //ex// // in case of error return empty list if( last_token==TOKEN_ERROR ) { //printf("ERROR RESULT=");dumpln(result); DEUSE( result ); result = new_parse_error( ERROR_INCOMPLETE_PAIR, errpos, input ); } DEALLOC( buffer ); //printf("#########"); //dump_atom(result,1); //printf("######\n"); return result; }
atom_t trim_shell_comment | ( | atom_t | word | ) |
word | word containing source text |
Trims a shell comment from the beginning of the word. Shell comment can be only the first line if its first two characters are #!
. If a shell comment is trimmed, then the result is a subword from the first character on the second line, otherwise the input word is returned as is but with increased reference count.
{ #ifdef SAFEMODE assert( IS_WORD(word) || IS_SUBWORD(word) ); #endif chars_t source = STRING(word); int_t len = LENGTH(word); // if there are no enough characters just exit if( LENGTH(word)<2 ) return USE(word); // if the first two characters are not #! then exit if( *source!=TEXT('#') || *(source+1)!=TEXT('!') ) return USE(word); // skip the line while( len && *source!=TEXT('\n') ) { source++; len--; } // return a subword. Pay attention to always reference // the main host word because the input could be a word // or a subword. if( IS_WORD(word) ) return new_subword( word, source, len ); else return new_subword( WORD(word), source, len ); }
word | word to be purified |
Purifies a word by processing all backslashes and bars. Returns a new word if needed. Assumes that the input needs purification.
{ //return USE(word); #ifdef SAFEMODE assert( IS_WORD(word) || IS_SUBWORD(word) ); #endif chars_t source = STRING(word); int_t len = LENGTH(word); chars_t buffer = alloca( CHAR_SIZE*len ); // buffer for the longest word chars_t bp = buffer; int need_enbar = 0; int is_mutated = 0; // set to 1 if the word is mutated int in_backslash = 0; int in_bars = 0; for( ; len; len--,source++ ) { need_enbar = in_bars || in_backslash; if( in_backslash ) { in_backslash = 0; } else if( *source==TEXT('\\') ) { is_mutated = 1; in_backslash = 1; continue; } else if( *source==TEXT('|') ) { is_mutated = 1; in_bars = !in_bars; continue; } if( need_enbar ) *bp++ = ENBAR(*source); else *bp++ = *source; //if( need_enbar ) //printf(" PURIFY %d %d\n",*source,ENBAR(*source)); //else //printf(" PURIFY %d \n",*source ); } *bp = NULL_CHAR; if( is_mutated ) return new_word( buffer, bp-buffer ); else return USE(word); }
atom_t parentheses | ( | atom_t | input | ) |
input | flat list |
Processes all (...) of a flat list by making them as sublists. Returns ERROR_INCOMPLETE_PAIR if the parentheses are not paired well.
{ atom_t result = empty_list; atom_t _paren( int level, atom_t openparen, int* last_flags ) { #ifdef DEBUG_PARENTHESES printf("<PAREN> ENTER LEVEL %d INPUT=", level); dumpln(input); #endif //DEBUG_PARENTHESES *last_flags = 0; atom_t result = empty_list; atom_t last = empty_list; while( IS_NOT_EMPTY(input) ) { // get current element atom_t elem = USE(CAR(input)); int flags = FLAGS(input); #ifdef DEBUG_PARENTHESES printf("<PAREN> READ ELEMENT "); dumpln(elem); #endif //DEBUG_PARENTHESES // if it is ( then call _parse() recursively // if it is ) then exit current _parse() if( IS_WORD(elem)||IS_SUBWORD(elem) ) if( LENGTH(elem)==1 ) { if( *STRING(elem)==TEXT('(') ) // process ( { int f; DEUSE( elem ); //compensate USE() when elem was retrieved input = CDR(input); elem = _paren( level+1, elem, &f ); flags |= f; if( IS_ERROR(elem) ) { DEUSE( result ); return elem; } } else if( *STRING(elem)==TEXT(')') )// process ) { DEUSE( elem ); //compensate USE() when elem was retrieved *last_flags = FLAGS(input); break; } } #ifdef DEBUG_PARENTESES printf("<PAREN> APPEND "); dumpln(elem); #endif //DEBUG_PARENTHESES // append current element if( IS_EXTENDED(input) ) { append_ex( elem, &result, &last ); DEUSE( POS(last) ); POS( last ) = USE( POS( input ) ); } else append( elem, &result, &last ); FLAGS(last) |= flags; // empty list if( IS_EMPTY(input) ) break; // move to next element input = CDR(input); } if( level && IS_EMPTY(input) ) { DEUSE( result ); return new_error( ERROR_INCOMPLETE_PAIR, openparen ); } if( !level && !IS_EMPTY(input) ) { DEUSE( result ); return new_error( ERROR_INCOMPLETE_PAIR, CAR(input) ); } if( level && IS_EMPTY(result) ) { return new_error( ERROR_EMPTY_EXPRESSION, openparen ); } #ifdef DEBUG_PARENTHESES printf("<PAREN> EXIT LEVEL %d RESULT=", level); dumpln(result); #endif //DEBUG_PARENTHESES if( level ) SET_FLAGS( result, FLAG_EXPRESSION ); return result; } // _paren() int f; result = _paren( 0, empty_list, &f ); return result; }
atom_t define_user_function | ( | atom_t | source, |
atom_t | input, | ||
atom_t | parent | ||
) |
source | word containing the source |
input | contents between TO and END |
parent | context var where creation is done |
Defines new user-defined command or function. The input
contains the contents of the function -- these are all tokens between TO
and END
. Var creation is done in several steps:
: and are on a single line
: or double quotes "
.
: and are on the same line.{ #ifdef DEBUG_TO_END printf("<TO-END> DEFINING="); dumpln( input); #endif //atom_t last; atom_t a = input; atom_t name; atom_t lhi; // last_header_item // create the function atom_t function = new_var( word_to, parent, 1 ); //DEUSE(to); need_descr2( function ); SET_FLAGS( function, FLAG_FUNCTION ); LARGS( function ) = 0; RARGS( function ) = 0; // collecting left parameters while( IS_NOT_EMPTY(a) && !GET_FLAGS(a,FLAG_NEWLINE) && IS_ANY_WORD(CAR(a)) && (*STRING(CAR(a))==TEXT(':')) ) { lhi = CAR(a); atom_t var = new_local_var( CAR(a), function, 1 ); if( IS_ERROR(var) ) { DEUSE( input ); //DEUSE( function ); LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function return var; } SET_FLAGS( var, FLAG_VARIABLE ); LARGS( function )++; #ifdef SAFE_MODE assert( LARGS(function)<255 ); #endif a = CDR(a); } // collecting name if( IS_NOT_EMPTY(a) && !GET_FLAGS(a,FLAG_NEWLINE) && IS_ANY_WORD(CAR(a)) && (*STRING(CAR(a))!=TEXT('"')) ) { lhi = CAR(a); name = CAR(a); a = CDR(a); } else { DEUSE( input ); return new_error( ERROR_MISSING_NAME, a ); } //printf("<TO-END> DEFINING="); dumpln( name ); // collecting right parameters while( IS_NOT_EMPTY(a) && !GET_FLAGS(a,FLAG_NEWLINE) && IS_ANY_WORD(CAR(a)) && (*STRING(CAR(a))==TEXT(':')) ) { lhi = CAR(a); atom_t var = new_local_var( CAR(a), function, 1 ); if( IS_ERROR(var) ) { DEUSE( input ); //DEUSE( function ); LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function return var; } SET_FLAGS( var, FLAG_VARIABLE ); RARGS( function )++; #ifdef SAFE_MODE assert( RARGS(function)<255 ); #endif a = CDR(a); } // collect "..." if( IS_NOT_EMPTY(a) && !GET_FLAGS(a,FLAG_NEWLINE) && IS_ANY_WORD(CAR(a)) && (LENGTH(CAR(a))==3) && (*(STRING(CAR(a))+0)==TEXT('.')) && (*(STRING(CAR(a))+1)==TEXT('.')) && (*(STRING(CAR(a))+2)==TEXT('.')) ) { SET_FLAGS( function, FLAG_INFINITE_ARGS ); a = CDR(a); } // set offset of parameters int offset = BASE_OFFSET_PARAMS; // this is the start offset atom_t x; for( x = LOCALS(function); IS_NOT_EMPTY(x); x=CDR(x) ) { //printf("set offset of "); dump(NAME(CAR(x))); printf(" to be %d\n",offset); OFFSET(CAR(x)) = offset; offset += sizeof( atom_t ); } // check whether the function is already defined atom_t var = find_local_var( name, parent ); if( var ) { if( LARGS(var)!=LARGS(function) || RARGS(var)!=RARGS(function) ) { LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function atom_t result = new_error( ERROR_INCOMPATIBLE_REDEFINITION, input ); DEUSE( input ); return result; } // forget data of the old function and // reuse data of the new function DEUSE( FULLSOURCE(var) ); DEUSE( SOURCE(var) ); DEUSE( LOCALS(var) ); DEUSE( BODY(var) ); DEUSE( TREE(var) ); DEUSE( BINARY(var) ); TREE(var) = empty_list; BINARY(var) = empty_list; BODY(var) = USE(a); //USE(BODY(function)); LOCALS(var) = USE(LOCALS(function)); LOCALS(parent) = behead(LOCALS(parent)); //DEUSE(function); function = var; } else { DEUSE( NAME(function) ); NAME( function ) = USE(name); BODY( function ) = USE(a); } //LEVEL( function ) = level; ADDRESS( function ) = 0; PRIORITY( function ) = PRIORITY_FUN; // cut header and 'END' from source chars_t new_src = STRING(lhi)+LENGTH(lhi); int new_len = LENGTH(source) - (new_src-STRING(source)) - LENGTH(word_end); SOURCE( function ) = new_subword( source, new_src, new_len ); FULLSOURCE( function ) = USE( source ); //DEUSE( input ); #ifdef DEBUG_TO_END printf("<TO-END> DEFINED FUNCTION "); dumpln(NAME(function)); printf("<TO-END> SOURCE "); dumpln(SOURCE(function)); printf("<TO-END> BODY "); dumpln(BODY(function)); printf("<TO-END> TREE "); dumpln(TREE(function)); #endif //printf("@@@@@@DEFINE "); dump_atom(NAME(function),1); //printf(" IN PARENT "); dump_atom(NAME(PARENT(function)),1); //printf("@@@@@@\n"); return function; }
input | list with tokenized source code |
parent | context var where preparsing is done |
level | current level of nesting return source without TO ... END definitions |
Pre-parses a flat tokenized list by extracting all local TO
... END
definitions and creating corresponding local functions. Returns the remaining source code.
{ #ifdef DEBUG_TO_END printf("<TO-END> ENTER1="); dumpln( input); #endif atom_t a = input; atom_t before_a = NULL; atom_t before_to = NULL; atom_t first_to = NULL; int to_end_depth = 0; while( IS_NOT_EMPTY(a) ) { if( IS_ANY_WORD(CAR(a)) ) { // found TO or its synonym if( same_words(CAR(a),word_to) || same_words(CAR(a),word_to_syn) ) { if( to_end_depth==0 ) { before_to = before_a; first_to = a; } to_end_depth++; } // found END if( same_words( CAR(a), word_end )) { // END without to ? if( to_end_depth==0 ) { atom_t result = new_error( ERROR_INCOMPLETE_PAIR, CAR(a) ); DEUSE( input ); return result; } to_end_depth--; if( to_end_depth==0 ) { // found END corresponding to TO. // extract the whole TO..END if( first_to==before_a ) { DEUSE( input ); return new_error( ERROR_EMPTY_TO_END, first_to ); } atom_t word = new_subword(WORD(CAR(first_to)),STRING(CAR(first_to)),(STRING(CAR(a))-STRING(CAR(first_to))+LENGTH(word_end))); //atom_t word = empty_list; first_to = behead(first_to); //first_to = CDR(first_to); CDR(before_a) = empty_list; before_a = before_to; a = behead(a); if( before_to ) CDR(before_a) = a; // TO is not first item else input = a; // TO is first item #ifdef DEBUG_TO_END printf("<TO-END> EXTRACT:"); dumpln( first_to ); printf("<TO-END> CURRENT INPUT:"); dumpln( input ); printf("<TO-END> DEFINE IN PARENT:"); dumpln( parent ); #endif // create new command/function atom_t var = define_user_function( word, (first_to), parent ); DEUSE( word ); if( IS_ERROR(var) ) { DEUSE( input ); return var; } DEUSE( first_to ); // process recursively the function // for nested to-end definitions BODY(var) = preparse( BODY(var), var, level+1 ); // next item is set explicitely, SET_FLAGS( a, FLAG_NEWLINE ); first_to = NULL; before_to = NULL; continue; } } } // next item before_a = a; a = CDR(a); } // TO without END ? if( first_to ) { atom_t result = new_error( ERROR_INCOMPLETE_PAIR, CAR(first_to) ); DEUSE(input); return result; } #ifdef DEBUG_TO_END printf("<TO-END> LEFTOVERS="); dumpln( input); printf("\n"); #endif return input; }
input | unparsed list |
parent | parent var in which parsing is done |
top_level | flag whether parsing is at its top-level |
Parses a list into a tree (aka LISP notation). The list is supposed to be tokenized as commands. The parent
variable provides a context of variables which can be referenced from the parsed input.
running_compiled_code && bug #3442773
{ atom_t vars[MAX_ELEMS]; atom_t data[MAX_ELEMS]; atom_t poses[MAX_ELEMS]; // source position (if available) int pris[MAX_ELEMS]; int usedby[MAX_ELEMS]; int largs[MAX_ELEMS]; // number of unlinked left arguments int rargs[MAX_ELEMS]; // number of unlinked right arguments int count; // count of elements in above arrays atom_t aux = empty_list; // additional lists to delete //printf("parse==");dumpln(input); #ifdef DEBUG_PARSE void dump_arrays( ) { int i; for( i=0; i<count; i++ ) if( pris[i]>=PRIORITY_MIN ) { printf("<PARSE> %2d.[used by %2d; free=%d:%d] v%c0 pr=%d =", i, usedby[i], largs[i], rargs[i], vars[i]?'#':'=', pris[i]); dump_atom(data[i],1); if( !IS_UNBOUND(poses[i]) ) { printf(" @ "); dump_atom(poses[i],1); } printf("\n"); } printf("\n"); } #endif // Finds the first line of input. Return a pointer // to the first node after the line. Sets arrays // vars[], data[] and prios[]. atom_t get_line( atom_t input ) { int has_no_infinite = 1; count = 0; while( IS_NOT_EMPTY(input) ) { atom_t elem = USE(CAR(input)); //printf("testing element "); //dumpln(elem); #ifdef SAFEMODE assert( count<MAX_ELEMS ); #endif data[count] = elem; vars[count] = IS_ANY_WORD(elem) ? find_var(elem,parent) : NULL; if( VAR && !IS_COMMAND(VAR) && !IS_FUNCTION(VAR) ) { // If we have variable called 4 (e.g. MAKE 4 "BOZA) // then VAR!=NULL, but this is allowed only for // functions and commands, so set VAR=NULL! VAR=NULL; } pris[count] = (VAR ? PRIORITY(VAR) : PRIORITY_MAX); usedby[count] = -1; largs[count] = VAR ? LARGS(VAR) : 0; rargs[count] = VAR ? RARGS(VAR) : 0; poses[count] = IS_EXTENDED(input)?POS(input):unbound; if( VAR && IS_VARIABLE(VAR) ) { printf("found variable as function: "); dump_atom(NAME(VAR),1); printf("\n"); } // if element is a word not starting with ":" and // cannot be transfered into a number and does not // exist as a variable, then this is unknown function float64_t x; if( IS_ANY_WORD(elem) && !VAR && *STRING(elem)!=TEXT(':') && *STRING(elem)!=TEXT('"') && !atom_to_float(elem,&x) ) { //DEUSE( input ); DEUSE( elem ); return new_error( ERROR_DO_NOT_KNOW, elem ); } if( !top_level && VAR && has_no_infinite ) { if( GET_FLAGS(VAR,FLAG_INFINITE_ARGS) ) { rargs[count]=-1; has_no_infinite = 0; } } // test for unary if( VAR && GET_FLAGS(VAR,FLAG_CAN_BE_UNARY) && largs[count]==1 ) { // case 1: <spc> op <spc> -> binary // case 2: <spc> op -> unary // case 3: op <spc> -> binary // case 4: op -> binary if( GET_FLAGS(input,FLAG_AFTER_SPACE|FLAG_BEFORE_SPACE) == FLAG_AFTER_SPACE ) largs[count]=0; } count++; //next: input = CDR(input); // if the next item is on a new line AND if it is // not constant-list AND we are not in a sublist // then break the line here if( top_level && IS_NOT_EMPTY(input) && GET_FLAGS(input,FLAG_NEWLINE) && (!IS_LIST(CAR(input)) || IS_EXPRESSION(CAR(input))) ) break; } // line while return input; } //get_line() #ifdef DEBUG_PARSE printf("<PARSE> ENTER="); dumpln( input); #endif //----------------------------------------- // STEP 1: Convert (..) into subexpressions //----------------------------------------- if( IS_EMPTY(input) ) return input; if( IS_ERROR(input) ) return input; atom_t orig_input = input = parentheses( input ); if( IS_ERROR(input) ) return input; #ifdef DEBUG_PARSE printf("<PARSE> PARENTHESISEZ="); dumpln( input); #endif // if there is no parent consider the root as parent if( !parent ) parent = root; //----------------------------------------- // STEP 2-7: Convert list into lisp line-by-line //----------------------------------------- atom_t result = empty_list; atom_t last = empty_list; atom_t err = NULL; int i,j,finished; int* args; #define LINK \ { \ usedby[j] = i; \ args[i]--; \ } //printf("%d is used by %d\n",j,i); #define RELINK \ { \ if( j<usedby[j] ) { largs[usedby[j]]++; finished=0; } \ if( j>usedby[j] ) { rargs[usedby[j]]++; finished=0; } \ LINK; \ } while( IS_NOT_EMPTY(input) ) // main while { next_line: #ifdef DEBUG_PARSE printf("\n\n"); printf("<PARSE>------------------\n"); printf("<PARSE> START A NEW LINE \n"); printf("<PARSE>------------------\n"); #endif //----------------------------------- // STEP 2: Get a line from the input //----------------------------------- input = get_line( input ); if( IS_ERROR(input) ) { err = input; break; } #ifdef DEBUG_PARSE printf("<PARSE> ORIGINAL PARSE_LIST:\n"); dump_arrays( ); #endif //printf("<PARSE> LEFTOVER:"); dumpln(input); //printf("------------------------------------\n"); //----------------------------------- // STEP 3: Analyze and set dependencies between items //----------------------------------- finished = 0; while( !finished ) { finished = 1; for( i=0; i<count; i++ ) { // now process right parameters args = rargs; for( j=i+1; args[i] && j<count; j++ ) { if( vars[j] && !IS_FUNCTION(vars[j]) ) break; if( usedby[j]==-1 ) LINK else if( (usedby[j]>j || usedby[j]<i) && (pris[usedby[j]]<=pris[i]) ) RELINK; } // for largs // now process left parameters args = largs; for( j=i-1; args[i] && j>=0; j-- ) { //if(vars[j]) //{ //printf("j=%d cond=%d var=",j,!IS_FUNCTION(vars[j])); //dumpln(vars[j]); //} if( vars[j] && !IS_FUNCTION(vars[j]) ) break; if( usedby[i]==j ) break; if( usedby[j]==-1 ) { /*if( usedby[i]!=j )*/ LINK; // skip direct circular references } else if( (usedby[j]<j || usedby[j]>i) && (pris[usedby[j]]<pris[i]) ) { /*if( usedby[i]!=j )*/ RELINK; // skip direct circular reference } } // for largs } // for each element } // while not finished #ifdef DEBUG_PARSE printf("<PARSE> FINAL PARSE_LIST:\n"); dump_arrays( ); #endif //----------------------------------- // STEP 4: Check for extra or missing arguments //----------------------------------- #ifdef DEBUG_PARSE printf("<PARSE> START STEP 4:\n"); dump_arrays( ); #endif args = rargs; int top_level_pos = -1; for( j=0; j<count; j++ ) { // check whether there are two or more free items // only one is allowed for non-top levels if( usedby[j]==-1 ) { if( !top_level && top_level_pos>=0 ) { // we are ready to announce that there are extra values // but before doing this check whether there is function // that would be so polite to take any of them //printf(">>> top_level_pos=%d\n",top_level_pos); if( rargs[top_level_pos]==0 && vars[top_level_pos] && GET_FLAGS(vars[top_level_pos],FLAG_MAY_HAVE_EXTRA_ARG) ) { i = top_level_pos; //printf(">>> attach %d to %d\n",j,i); //printf(">>> rargs LINK; //printf(">>>>>>>"); dumpln(NAME(vars[top_level_pos])); } else { err = new_error( ERROR_CROWDED_EXPRESSION, data[j] ); break; } } if( top_level_pos<0 ) top_level_pos = j; } // left paramers can be skipped only for unary functions if( largs[j] && !GET_FLAGS(vars[j],FLAG_CAN_BE_UNARY) ) { err = new_error( ERROR_MISSING_LEFTS, data[j] ); break; } // some functions may ignore 1 missing right parameter if( rargs[j]==1 && GET_FLAGS(vars[j],FLAG_MAY_SKIP_LAST_ARG) ) continue; // when arguments can be infinite, this also mean any number if( rargs[j] && !GET_FLAGS(vars[j],FLAG_INFINITE_ARGS) ) { //printf("rargs=%d\n",rargs[j]); err = new_error( ERROR_MISSING_RIGHTS, data[j] ); break; } } if( err ) break; //----------------------------------- // STEP 5: Recursivelly process nested expressions // and arguments which must be lisp'ed too //----------------------------------- #ifdef DEBUG_PARSE printf("<PARSE> START STEP 5:\n"); dump_arrays( ); #endif for( i=0; i<count; i++ ) if( IS_LIST(data[i]) ) { if( GET_FLAGS(data[i],FLAG_EXPRESSION) ) { // nested expression atom_t expr = parse( data[i], parent, 0 ); if( IS_ERROR(expr) ) { err = expr; } else { DEUSE( data[i] ); data[i] = expr; SET_FLAGS( data[i], FLAG_EXPRESSION ); } } else { // possibly argument to be reparsed j=usedby[i]; if( (j>=0) && vars[j] && GET_FLAGS(vars[j],FLAG_PROCESS_ARGS) ) { atom_t expr = build_syntax_subtree( parent, data[i] ); if( IS_ERROR(expr) ) { err = expr; } else { DEUSE( data[i] ); data[i] = expr; SET_FLAGS( data[i], FLAG_EXPRESSION+FLAG_WAS_LIST_CONST ); } } } } if( err ) break; //----------------------------------- // STEP 6: Group items of subexpressions, process numbers //----------------------------------- #ifdef DEBUG_PARSE printf("<PARSE> START STEP 6:\n"); dump_arrays( ); #endif for( i=0; i<count; i++ ) { /* // convert all numeric data into numbers float64_t n; if( IS_ANY_WORD(data[i]) && atom_to_float(data[i],&n) ) { DEUSE( data[i] ); data[i] = new_float( n ); } // create a list node for each element // whatever the element is, this list node will be used data[i] = new_list( data[i], empty_list ); SET_FLAGS( data[i], FLAG_EXPRESSION ); */ // create a list node for each element // whatever the element is, this list node will be used float64_t n; if( IS_ANY_WORD(data[i]) && atom_to_float(data[i],&n) ) { atom_t pos = data[i]; // old source of the number data[i] = new_list_ex( new_float(n), empty_list ); POS( data[i] ) = pos; } else { data[i] = new_list( data[i], empty_list ); } SET_FLAGS( data[i], FLAG_EXPRESSION ); } for( i=0; i<count; i++ ) { if( usedby[i]>=0 ) { atom_t a = data[usedby[i]]; atom_t name = CAR(data[i]); atom_t function = vars[usedby[i]]; //printf("name(%d)=",i);dumpln(name); //printf("function(%d)=",usedby[i]);dumpln(function); //printf("a(%d)=",usedby[i]);dumpln(a); // if the item is word constant and is used by // a function which creates variables, then do // create the variables now. if( GET_FLAGS(function,FLAG_SET_ALL_VARS) && IS_ANY_WORD(name) && LENGTH(name)>1 && *STRING(name)==TEXT('"') ) { //printf("create local "); dumpln(name); //printf(" in "); dumpln(NAME(parent)); atom_t var = new_local_var( name, parent, 1 ); if( IS_ERROR(var) ) err = var; SET_FLAGS( var, FLAG_VARIABLE ); } // if the item is word constant and is used by // a function which creates a single variable // (like MAKE), then do create the variable as // global if it does not exist // 2010.06.26: create the variable only if the // parsing has not been activated at run-time. atom_t real_name = NULL; if( !running_compiled_code && GET_FLAGS(function,FLAG_SET_ONE_VAR) && (i==usedby[i]+1) && (IS_INTEGER(name) || IS_FLOAT(name)) ) { real_name = atom_to_word( name ); goto use_numeric_name; } if( GET_FLAGS(function,FLAG_SET_ONE_VAR) && (i==usedby[i]+1) && IS_ANY_WORD(name) && LENGTH(name)>1 && *STRING(name)==TEXT('"') ) { real_name = new_subword( name, STRING(name)+1, LENGTH(name)-1 ); use_numeric_name: if( !find_var( real_name, parent ) ) { atom_t var = new_local_var( real_name, parent/*root*/, 0 ); //printf("!2087!"); if( IS_ERROR(var) ) err = var; SET_FLAGS( var, FLAG_VARIABLE ); } DEUSE( real_name ); // patch for FOR command. If the control // variable is ABC then create ABC and // ^ABC if( ADDRESS(function)==(int)rt_for ) { assert( IS_ANY_WORD(name) ); atom_t real_name = new_word( STRING(name), LENGTH(name) ); *STRING(real_name) = L'^'; if( !find_var( real_name, parent ) ) { atom_t var = new_local_var( real_name, parent/*root*/, 0 ); //printf("!2103!"); if( IS_ERROR(var) ) err = var; SET_FLAGS( var, FLAG_VARIABLE ); } DEUSE( real_name ); } } // if the item is word constant and is used by // a function which load libraries, then do // load the library now. if( ADDRESS(function)==(int)rt_load && IS_ANY_WORD(name) && LENGTH(name)>1 && *STRING(name)==TEXT('"') ) { atom_t real_name = new_word( STRING(name)+1, LENGTH(name)-1 ); atom_t word = read_word( STRING(real_name) ); DEUSE( real_name ); if( IS_ERROR(word) ) { err = USE(word); break; } atom_t trimmed = trim_shell_comment( word ); DEUSE( word ); atom_t tokens1 = tokenize( trimmed, TOKENIZE_DATA ); DEUSE( trimmed ); if( IS_ERROR(tokens1) ) { err = tokens1; break; } atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS ); DEUSE( tokens1 ); if( IS_ERROR(tokens2) ) return tokens2; tokens2 = preparse( tokens2, root, LEVEL(root) ); atom_t body = parentheses( tokens2 ); DEUSE( tokens2 ); //printf("BODY=");dumpln(body); //printf("input=");dumpln(input); if( IS_NOT_EMPTY(body) ) { atom_t a; for( a = body; IS_NOT_EMPTY(CDR(a)); a=CDR(a) ); CDR(a) = USE(input); input = body; } //printf("old aux="); dumpln(aux); aux = new_list( body, aux ); //printf("new aux="); dumpln(aux); for( i=0; i<count; i++ ) DEUSE( data[i] ); goto next_line; } SET_FLAGS( a, FLAG_EXPRESSION ); while( IS_NOT_EMPTY(CDR(a)) ) a=CDR(a); CDR(a) = vars[i] ? new_list(data[i],empty_list) : data[i]; } } if( err ) break; //----------------------------------- // STEP 7: Group top-most items into result list //----------------------------------- #ifdef DEBUG_PARSE printf("<PARSE> START STEP 7:\n"); dump_arrays( ); #endif //printf("data[0]=");dumpln(data[0]); //printf("vars[0]=");dumpln(vars[0]); for( i=0; i<count; i++ ) if( usedby[i]==-1 ) { //printf("i=%d var=%x is_expr=%d\n",i,vars[i],IS_EXPRESSION(CAR(data[i]))); //boza if( !vars[i] && !IS_EXPRESSION(CAR(data[i]))) //boza { //printf("i=%d\n",i); //boza err = new_error_atom( ERROR_UNUSED_VALUE, data[i] ); //boza break; //boza } if( IS_UNBOUND(poses[i]) ) { append( data[i], &result, &last ); } else { append_ex( data[i], &result, &last ); DEUSE( POS(last) ); POS( last ) = USE( poses[i] ); } } if( err ) break; } // main line //----------------------------------------- // STEP pre-8: Test for error and release unused resources //----------------------------------------- DEUSE( orig_input ); DEUSE( aux ); if( err ) { //printf("ERRRRRRROR\n"); for( i=0; i<count; i++ ) DEUSE( data[i] ); DEUSE( result ); return err; } //----------------------------------- // STEP 8: Remove unnecessary parentheses //----------------------------------- //printf("before=");dumpln(result); // // step 8 removed, because in some cases it removes more // parentheses than needed - e.g. when function body has // only one command // while( IS_EMPTY(CDR(result)) && // IS_LIST(CAR(result)) && // GET_FLAGS(CAR(result),FLAG_EXPRESSION) ) // { // atom_t a = CAR(result); // CAR(result) = empty_list; // DEUSE( result ); // result = a; // } // //printf(" after=");dumpln(result); if( !top_level ) SET_FLAGS( result, FLAG_EXPRESSION ); #ifdef DEBUG_PARSE printf("<PARSE> EXIT="); dumpln(result); #endif if( OPTION_DUMP_AST ) { outter( TEXT("Abstract Syntax Tree:\n\0"), UNKNOWN ); dumpln( result ); outter( TEXT("\n\0"), UNKNOWN ); } return result; }
atom_t build_syntax_tree | ( | atom_t | func | ) |
func | var atom for the parse context |
Parses completely a function. Its source is stored in its body as word, subword, data-tokenized list or command-tokenized list. Building algorithm:
TO
... ENDs
and create them as subfunctions{ // exit is function is already treefied if( IS_NOT_EMPTY(TREE(func)) ) return empty_list; //printf("BUILD_SYNTAX_TREE(FUNC="); //dump(NAME(func)); //printf(",SOURCE="); //dump(SOURCE(func)); //printf(",BODY="); //dump(BODY(func)); //printf(",LOCALS="); //dump(LOCALS(func)); //printf(")\n\n"); if( IS_EMPTY(BODY(func)) ) { // Step 1. Tokenize //printf("SOURCE="); dumpln(SOURCE(func)); atom_t tokens1 = tokenize( SOURCE(func), TOKENIZE_DATA ); if( IS_ERROR(tokens1) ) return tokens1; //printf("TOKENS1="); dumpln(tokens1); atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS ); DEUSE( tokens1 ); if( IS_ERROR(tokens2) ) return tokens2; //printf("TOKENS2="); dumpln(tokens2); // Step 2. Extract TO..END's atom_t body = preparse( tokens2, func, LEVEL(func) ); if( IS_ERROR(body) ) return body; DEUSE( BODY(func) ); BODY(func) = body; //printf("BODY="); dumpln(BODY(func)); } // Step 3. Parse function body //printf("BODY="); dumpln(BODY(func)); atom_t tree = parse( BODY(func), func, 1 ); if( IS_ERROR(tree) ) return tree; DEUSE(TREE(func)); TREE(func) = tree; //printf("TREE="); dumpln(TREE(func)); // because the might be some new TO..ENDs // scan all locals and build those which // have no trees atom_t local; atom_t locals; for( locals=LOCALS(func); IS_NOT_EMPTY(locals); locals=CDR(locals) ) { local = CAR(locals); if( !DESCR2(local) ) continue; atom_t x = build_syntax_tree( local ); if( IS_ERROR(x) ) return x; } return empty_list; }