/* * SPL - The SPL Programming Language * Copyright (C) 2004, 2005 Clifford Wolf * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * compiler.y: Compiler for the high-level language * * +----------------+ * | \|/ ____ \|/ | WARNING: In the epilog of this bison parser you will * | "@'/ ,. \`@" | find a hand-written lexer which does a lot of nasty * | /_| \__/ |_\ | things. Reading it may cause collywobbles and changing * | \__U_/ | it might break the whole compiler. * | | | | You have been warned!! * +----------------+ */ %{ #include #include #include #include #include #include #ifdef ENABLE_PTHREAD_SUPPORT #include #endif #include "spl.h" #define COMPAT_H_NO_WIN_INCL #include "compat.h" static int spl_yylex (void); static void spl_yyerror (char const *); static struct spl_asm *as; static int gen_debug_info; static int no_checkp_insn; static void create_debug_op(int force); static int rvtolv_counter; static int import_asm_label_counter; static int label_stack[1024]; static int label_stack_index; static int label_stack_counter; static void lbstack_push() { label_stack[++label_stack_index] = ++label_stack_counter; } static void lbstack_pop() { label_stack_index--; } static int lbstack_value() { return label_stack[label_stack_index]; } static int breakcont_stack[1024]; static int breakcont_stack_index; static int breakcont_stack_counter; static void breakcont_begin() { breakcont_stack[++breakcont_stack_index] = ++breakcont_stack_counter; } static void breakcont_end() { breakcont_stack_index--; } static int breakcont_value() { return breakcont_stack[breakcont_stack_index]; } static int breakcont_label(char type) { char label[100]; snprintf(label, sizeof(label), "%c%d:%d", type, lbstack_value(), breakcont_value()); if ( spl_asm_setlabel(as, label, spl_asm_add(as, SPL_OP_NOP, 0)) < 0 ) { spl_yyerror("Assembler error"); return 1; } return 0; } static void breakcont_goto(char type) { char label[100]; snprintf(label, sizeof(label), "%c%d:%d", type, lbstack_value(), breakcont_value()); spl_asm_reflabel(as, label, spl_asm_add(as, SPL_OP_GOTO, 0)); } static int packpatch_stack[1024]; static int packpatch_stack_index; static int pbstack_push(int v) { return packpatch_stack[packpatch_stack_index++] = v; } static int pbstack_pop() { return packpatch_stack[--packpatch_stack_index]; } static int php_like_tags_active; static int php_like_tags_indenting_delim; static char *php_like_tags_term; #ifdef ENABLE_PTHREAD_SUPPORT static pthread_mutex_t compiler_lck = PTHREAD_MUTEX_INITIALIZER; #endif #define CHECKPOINT() \ do { if (!no_checkp_insn) spl_asm_add(as, SPL_OP_CHECKP, 0); } while (0) #define REGEX_EVAL_SUBST 1 #define REGEX_EVAL_RETURN 2 #define REGEX_EVAL_NEG 4 %} /* there are 3 shift/reduce conflicts in this grammar. search * * for "shift/reduce conflict" in the sources below for details. */ %expect 3 %name-prefix="spl_yy" %token-table %debug %union { char *text; char ch; } %token ID FUNC_ID VALUE SPECIALREF %token NOTOKEN %token TRANSLATE_PREFIX %token TRANSLATE_SEPERATOR %token TRANSLATE_END %token STRING_EOL %token STRING_EOL_S %token STRING_LABEL %token STRING_LABEL_S %token DBLCOMMA %token DEBUG %token WARNING %token ERROR %token DELETE %token FUNCTION %token METHOD %token IMPORT %token LOAD %token OBJECT %token VAR %token STATIC %token NEW %token THIS %token IF %token ELSE %token DO %token WHILE %token FOR %token FOREACH %token ASM %token RETURN %token EXIT %token DECLARED %token UNDEF %token GOTO %token BREAK %token CONTINUE %token ARRAYREF %token TRY %token CATCH %token THROW %token SWITCH %token DEFAULT %token CASE %left '.' %token EF_BEGIN EF_END BLK_BEGIN_NOCTX %token INSERT_PROG_BEGIN INSERT_PROG_END %token RVALUE_CONTEXT_BEGIN RVALUE_CONTEXT_END %right '=' SETCOPY ADDEQ SUBEQ MULEQ DIVEQ MODEQ POWEQ IADDEQ ISUBEQ IMULEQ IDIVEQ IMODEQ IPOWEQ FADDEQ FSUBEQ FMULEQ FDIVEQ FMODEQ FPOWEQ OADDEQ OSUBEQ OMULEQ ODIVEQ OMODEQ OPOWEQ SAPPEND XCHG %left LOR %left LAND %nonassoc REGEX NREGEX REGEX_SUBST NREGEX_SUBST REGEX_SUBST_R NREGEX_SUBST_R REGEX_EVAL NREGEX_EVAL REGEX_EVAL_R NREGEX_EVAL_R REGEX_SEP %nonassoc PEQ PNE EQ NE LT GT LE GE IEQ INE ILT IGT ILE IGE FEQ FNE FLT FGT FLE FGE OEQ ONE OLT OGT OLE OGE SEQ SNE SLT SGT SLE SGE %right POP SHIFT PUSH UNSHIFT NEXT PREV EVAL LENGTHOF ELEMENTSOF %left SUB ADD ISUB IADD FSUB FADD OSUB OADD %left MUL DIV MOD IMUL IDIV IMOD FMUL FDIV FMOD OMUL ODIV OMOD %left POW IPOW FPOW OPOW %left LNOT %left INC DEC %left NEG %right DEFINED %right ENC %left CAT %type function_method %type foreach_type %type regex_eval_mode %% prog: | prog { create_debug_op(0); } cmd ; cmd: '{' { spl_asm_add(as, SPL_OP_BEGIN, 0); } prog '}' { spl_asm_add(as, SPL_OP_END, 0); } | complex_expr ';' { spl_asm_add(as, SPL_OP_DROP, 0); CHECKPOINT(); } | lvalue XCHG lvalue ';' { spl_asm_add(as, SPL_OP_LXCHG, 0); } | ';' { CHECKPOINT(); } | BLK_BEGIN_NOCTX prog ']' '}' | ID ':' { char *label; my_asprintf(&label, "L%d:%s", lbstack_value(), $1); if ( spl_asm_setlabel(as, label, spl_asm_add(as, SPL_OP_NOP, 0)) < 0 ) { spl_yyerror("Assembler error"); free(label); YYABORT; } free(label); free($1); } cmd | GOTO ID ';' { char *label; my_asprintf(&label, "L%d:%s", lbstack_value(), $2); spl_asm_reflabel(as, label, spl_asm_add(as, SPL_OP_GOTO, 0)); free(label); free($2); } | BREAK { breakcont_goto('B'); } | CONTINUE { breakcont_goto('C'); } | VAR var_decl ';' { CHECKPOINT(); } | STATIC static_decl ';' { CHECKPOINT(); } | IF '(' complex_expr ')' { spl_asm_add(as, SPL_OP_UNLESS, 0); pbstack_push(spl_asm_add(as, SPL_OP_JUMP, 0)); } cmd else_branch { CHECKPOINT(); } | WHILE { pbstack_push(spl_asm_add(as, SPL_OP_NOP, 0)); breakcont_begin(); if (breakcont_label('C')) YYABORT; } '(' complex_expr ')' { spl_asm_add(as, SPL_OP_UNLESS, 0); pbstack_push(spl_asm_add(as, SPL_OP_JUMP, 0)); } cmd { int label_end = pbstack_pop(); int label_begin = pbstack_pop(); spl_asm_setaddr(as, spl_asm_add(as, SPL_OP_JUMP, 0), label_begin); spl_asm_setaddr(as, label_end, spl_asm_add(as, SPL_OP_NOP, 0)); if (breakcont_label('B')) YYABORT; breakcont_end(); } { CHECKPOINT(); } | DO { pbstack_push(spl_asm_add(as, SPL_OP_NOP, 0)); breakcont_begin(); } cmd { if (breakcont_label('C')) YYABORT; } WHILE '(' complex_expr ')' ';' { int label_begin = pbstack_pop(); spl_asm_add(as, SPL_OP_IF, 0); spl_asm_setaddr(as, spl_asm_add(as, SPL_OP_JUMP, 0), label_begin); if (breakcont_label('B')) YYABORT; breakcont_end(); } { CHECKPOINT(); } | FOR '(' { breakcont_begin(); spl_asm_add(as, SPL_OP_BEGIN, 0); } for_cmds ';' { pbstack_push(spl_asm_add(as, SPL_OP_NOP, 0)); } complex_expr ';' { spl_asm_add(as, SPL_OP_UNLESS, 0); pbstack_push(spl_asm_add(as, SPL_OP_JUMP, 0)); } { pbstack_push(spl_asm_add(as, SPL_OP_NOP, 0)); } for_cmds ')' { pbstack_push(spl_asm_add(as, SPL_OP_NOP, 0)); } cmd { if (breakcont_label('C')) YYABORT; int label_body_start = pbstack_pop(); int label_extra_start = pbstack_pop(); int label_finish_jump = pbstack_pop(); int label_cond_start = pbstack_pop(); int label_body_end = spl_asm_add(as, SPL_OP_JUMP, 0); spl_asm_setaddr(as, label_body_end, label_cond_start); spl_asm_setaddr(as, label_finish_jump, spl_asm_add(as, SPL_OP_NOP, 0)); spl_asm_shuffle(as, label_body_end - label_body_start, label_body_start, label_extra_start, label_body_start - label_extra_start, label_extra_start, label_extra_start + (label_body_end - label_body_start), -1); spl_asm_add(as, SPL_OP_END, 0); if (breakcont_label('B')) YYABORT; breakcont_end(); } { CHECKPOINT(); } | FOREACH foreach_type ID { breakcont_begin(); spl_asm_add(as, SPL_OP_BEGIN, 0); spl_asm_add(as, SPL_OP_PUSHC, "#index"); spl_asm_add(as, SPL_OP_UNDEF, 0); spl_asm_add(as, SPL_OP_POPL, 0); spl_asm_add(as, SPL_OP_PUSHC, "#array"); } '(' complex_expr ')' { spl_asm_add(as, SPL_OP_POPL, 0); if (breakcont_label('C')) YYABORT; pbstack_push(spl_asm_add(as, SPL_OP_NOP, 0)); spl_asm_add(as, SPL_OP_PUSHC, "#index"); spl_asm_add(as, SPL_OP_PUSH, "#array"); spl_asm_add(as, SPL_OP_PUSH, "#index"); spl_asm_add(as, SPL_OP_NEXT, 0); spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_DEFINED, 0); spl_asm_add(as, SPL_OP_UNLESS, 0); pbstack_push(spl_asm_add(as, SPL_OP_JUMP, 0)); spl_asm_add(as, SPL_OP_POPL, 0); if ($2 == 'V') { spl_asm_add(as, SPL_OP_PUSHC, $3); spl_asm_add(as, SPL_OP_PUSHC, "#array"); spl_asm_add(as, SPL_OP_PUSH, "#index"); spl_asm_add(as, SPL_OP_HENC, 0); spl_asm_add(as, SPL_OP_DOTCAT, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); spl_asm_add(as, SPL_OP_POPL, 0); } else { spl_asm_add(as, SPL_OP_PUSHC, $3); spl_asm_add(as, SPL_OP_PUSH, "#index"); spl_asm_add(as, SPL_OP_POPL, 0); } } cmd { int finish_jump = pbstack_pop(); spl_asm_setaddr(as, spl_asm_add(as, SPL_OP_JUMP, 0), pbstack_pop()); spl_asm_setaddr(as, finish_jump, spl_asm_add(as, SPL_OP_DROP, 0)); spl_asm_add(as, SPL_OP_DROP, 0); spl_asm_add(as, SPL_OP_END, 0); if (breakcont_label('B')) YYABORT; breakcont_end(); CHECKPOINT(); free($3); } | function_method ID '(' { lbstack_push(); spl_asm_add(as, SPL_OP_PUSHC, $2); pbstack_push(spl_asm_add(as, $1 == 'F' ? SPL_OP_REGF : SPL_OP_REGM, 0)); } arglist_def optional_list_tail ')' { spl_asm_add(as, SPL_OP_CLEARA, 0); if (gen_debug_info) { spl_asm_add(as, SPL_OP_PUSHC, "#func"); spl_asm_add(as, SPL_OP_PUSHC, $2); spl_asm_add(as, SPL_OP_POPL, 0); } } '{' prog '}' { spl_asm_add(as, SPL_OP_UNDEF, 0); spl_asm_add(as, SPL_OP_RETURN, 0); spl_asm_setaddr(as, pbstack_pop(), spl_asm_add(as, SPL_OP_POPL, 0)); CHECKPOINT(); lbstack_pop(); free($2); } | OBJECT lvalue lvalue '{' { lbstack_push(); spl_asm_add(as, SPL_OP_OBJECT, 0); } prog '}' { spl_asm_add(as, SPL_OP_ENDOBJ, 0); CHECKPOINT(); lbstack_pop(); } | OBJECT lvalue '{' { lbstack_push(); spl_asm_add(as, SPL_OP_UNDEF, 0); spl_asm_add(as, SPL_OP_OBJECT, 0); spl_asm_add(as, SPL_OP_PUSHC, "init"); pbstack_push(spl_asm_add(as, SPL_OP_REGM, 0)); spl_asm_add(as, SPL_OP_CLEARA, 0); spl_asm_add(as, SPL_OP_PUSH, "!THIS"); spl_asm_add(as, SPL_OP_RETURN, 0); spl_asm_setaddr(as, pbstack_pop(), spl_asm_add(as, SPL_OP_POPL, 0)); } prog '}' { spl_asm_add(as, SPL_OP_ENDOBJ, 0); CHECKPOINT(); lbstack_pop(); } | TRY '(' ID ')' '{' { lbstack_push(); spl_asm_add(as, SPL_OP_BEGIN, 0); spl_asm_add(as, SPL_OP_PUSHC, $3); spl_asm_add(as, SPL_OP_TRY, 0); free($3); int j = spl_asm_add(as, SPL_OP_JUMP, 0); int n = spl_asm_add(as, SPL_OP_NOP, 0); pbstack_push(n); pbstack_push(j); } prog { pbstack_push(spl_asm_add(as, SPL_OP_JUMP, 0)); } catch_list '}' { spl_asm_setaddr(as, pbstack_pop(), spl_asm_add(as, SPL_OP_NOP, 0)); int j = pbstack_pop(); int n = pbstack_pop(); spl_asm_setaddr(as, j, n); spl_asm_add(as, SPL_OP_END, 0); lbstack_pop(); } | THROW complex_expr ';' { spl_asm_add(as, SPL_OP_THROW, 0); } | SWITCH '{' { spl_asm_add(as, SPL_OP_BEGIN, 0); } prog { int case_begin_jump = spl_asm_add(as, SPL_OP_JUMP, 0); int fin_jump = spl_asm_add(as, SPL_OP_JUMP, 0); spl_asm_setaddr(as, case_begin_jump, spl_asm_add(as, SPL_OP_NOP, 0)); pbstack_push(fin_jump); } case_list '}' { spl_asm_setaddr(as, pbstack_pop(), spl_asm_add(as, SPL_OP_NOP, 0)); spl_asm_add(as, SPL_OP_END, 0); } | ASM asm_list ';' { CHECKPOINT(); } | RETURN complex_expr ';' { spl_asm_add(as, SPL_OP_RETURN, 0); } | RETURN ';' { spl_asm_add(as, SPL_OP_UNDEF, 0); } { spl_asm_add(as, SPL_OP_RETURN, 0); } | EXIT ';' { spl_asm_add(as, SPL_OP_HALT, 0); } | DELETE lvalue ';' { spl_asm_add(as, SPL_OP_DELETE, 0); } { CHECKPOINT(); } | DEBUG complex_expr ';' { spl_asm_add(as, SPL_OP_DEBUG, 0); } | WARNING complex_expr ';' { spl_asm_add(as, SPL_OP_WARNING, 0); } | ERROR complex_expr ';' { spl_asm_add(as, SPL_OP_ERROR, 0); } | IMPORT complex_expr ';' { spl_asm_add(as, SPL_OP_IMPORT, 0); } | LOAD complex_expr ';' { spl_asm_add(as, SPL_OP_LOAD, 0); } ; asm_list: | asm_list VALUE { spl_asm_parse_line(as, $2); free($2); } /** shift/reduce conflict ** * * In a statement such as: * * for (var x=1, y=2; ..... * ^ * there is a shift/reduce conflict wheter the comma seperates two commands * or two var_decl_entries. It is resolved by shifting, so the DBLCOMMA (,,) * operator is needed for adding additional commands. */ for_cmds: | VAR var_decl | complex_expr { spl_asm_add(as, SPL_OP_DROP, 0); CHECKPOINT(); } | for_cmds ',' complex_expr { spl_asm_add(as, SPL_OP_DROP, 0); CHECKPOINT(); } | for_cmds DBLCOMMA complex_expr { spl_asm_add(as, SPL_OP_DROP, 0); CHECKPOINT(); } ; var_decl: var_decl_entry | var_decl ',' var_decl_entry ; var_decl_entry: lvalue { spl_asm_add(as, SPL_OP_UNDEF, 0); spl_asm_add(as, SPL_OP_POPL, 0); } | lvalue '=' complex_expr { spl_asm_add(as, SPL_OP_POPL, 0); } | lvalue SETCOPY { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_UNDEF, 0); spl_asm_add(as, SPL_OP_POPL, 0); } complex_expr { spl_asm_add(as, SPL_OP_POPU, 0); } ; static_decl: static_decl_entry | static_decl ',' var_decl_entry ; static_decl_entry: lvalue { spl_asm_add(as, SPL_OP_UNDEF, 0); spl_asm_add(as, SPL_OP_POPS, 0); } | lvalue '=' complex_expr { spl_asm_add(as, SPL_OP_POPS, 0); } | lvalue SETCOPY { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_UNDEF, 0); spl_asm_add(as, SPL_OP_POPS, 0); } complex_expr { spl_asm_add(as, SPL_OP_POPU, 0); } ; /** shift/reduce conflict ** * * This is the typical else-branch shift/reduce conflict which can be found in * almost every programming language grammar. */ else_branch: { spl_asm_setaddr(as, pbstack_pop(), spl_asm_add(as, SPL_OP_NOP, 0)); } | ELSE { int else_label = pbstack_pop(); pbstack_push(spl_asm_add(as, SPL_OP_JUMP, 0)); spl_asm_setaddr(as, else_label, spl_asm_add(as, SPL_OP_NOP, 0)); } cmd { spl_asm_setaddr(as, pbstack_pop(), spl_asm_add(as, SPL_OP_NOP, 0)); } ; catch_list: catch_list_entry | catch_list catch_list_entry ; catch_list_entry: CATCH ID ':' { int fin = pbstack_pop(); int frm = pbstack_pop(); spl_asm_setaddr(as, frm, spl_asm_add(as, SPL_OP_NOP, 0)); spl_asm_add(as, SPL_OP_PUSHC, $2); int c = spl_asm_add(as, SPL_OP_CATCH, 0); free($2); frm = spl_asm_add(as, SPL_OP_JUMP, 0); pbstack_push(frm); pbstack_push(fin); spl_asm_setaddr(as, c, spl_asm_add(as, SPL_OP_NOP, 0)); } prog { int fin = pbstack_pop(); int frm = pbstack_pop(); spl_asm_setaddr(as, spl_asm_add(as, SPL_OP_JUMP, 0), fin); pbstack_push(frm); pbstack_push(fin); } ; case_list: case_list_entry | case_list case_list_entry ; case_list_entry: case_list_entry_cond { spl_asm_add(as, SPL_OP_UNLESS, 0); pbstack_push(spl_asm_add(as, SPL_OP_JUMP, 0)); } prog { int next_jump = pbstack_pop(); int fin_jump = pbstack_pop(); pbstack_push(fin_jump); spl_asm_setaddr(as, spl_asm_add(as, SPL_OP_JUMP, 0), fin_jump); spl_asm_setaddr(as, next_jump, spl_asm_add(as, SPL_OP_NOP, 0)); } ; case_list_entry_cond: CASE complex_expr ':' | DEFAULT ':' { spl_asm_add(as, SPL_OP_ONE, 0); } ; foreach_type: '[' ']' { $$ = 'V'; } | { $$ = 'K'; } ; function_method: FUNCTION { $$ = 'F'; } | METHOD { $$ = 'M'; } ; regex_eval_mode: basic_expr REGEX_EVAL_R { $$ = REGEX_EVAL_RETURN; } | basic_expr NREGEX_EVAL_R { $$ = REGEX_EVAL_RETURN|REGEX_EVAL_NEG; } | lvalue REGEX_EVAL { $$ = REGEX_EVAL_SUBST; } | lvalue NREGEX_EVAL { $$ = REGEX_EVAL_SUBST|REGEX_EVAL_NEG; } ; lvalue: lvalue_head lvalue_tail_list | rvalue_to_lvalue_head '.' { char varname[32]; snprintf(varname, 32, "#rvtolv%d", rvtolv_counter++); spl_asm_add(as, SPL_OP_PUSHC, varname); spl_asm_add(as, SPL_OP_XCHG, 0); spl_asm_add(as, SPL_OP_POPL, 0); spl_asm_add(as, SPL_OP_PUSHC, varname); } lvalue_tail lvalue_tail_list { spl_asm_add(as, SPL_OP_DOTCAT, 0); } ; rvalue_to_lvalue_head: simple_call | '(' complex_expr ')' ; lvalue_head: ID { spl_asm_add(as, SPL_OP_PUSHC, $1); free($1); } | THIS { spl_asm_add(as, SPL_OP_PUSHC, "!THIS"); } | SPECIALREF { spl_asm_add(as, SPL_OP_PUSHC, $1); free($1); } ; lvalue_tail_list: | '.' lvalue_tail lvalue_tail_list { spl_asm_add(as, SPL_OP_DOTCAT, 0); } ; lvalue_tail: ID { spl_asm_add(as, SPL_OP_PUSHC, $1); free($1); } | '[' complex_expr ']' { spl_asm_add(as, SPL_OP_HENC, 0); } | SPECIALREF { spl_asm_add(as, SPL_OP_PUSHC, $1); free($1); } ; val_list_entry: VALUE { spl_asm_add(as, SPL_OP_PUSHC, $1); free($1); } ; val_list: val_list_entry | val_list val_list_entry { spl_asm_add(as, SPL_OP_CAT, 0); } ; /** shift/reduce conflict ** * * The 'rvalue CAT rvalue' creates another simple shift/reduce conflict. * Shifting is ok - so we have no problem here.. */ rvalue: rvalue_primitives | lvalue INC { spl_asm_add(as, SPL_OP_POSTINC, 0); } | lvalue DEC { spl_asm_add(as, SPL_OP_POSTDEC, 0); } | INC lvalue { spl_asm_add(as, SPL_OP_PREINC, 0); } | DEC lvalue { spl_asm_add(as, SPL_OP_PREDEC, 0); } | LENGTHOF rvalue { spl_asm_add(as, SPL_OP_LENGTH, 0); } | ELEMENTSOF rvalue { spl_asm_add(as, SPL_OP_ELEMENTS, 0); } | rvalue CAT rvalue { spl_asm_add(as, SPL_OP_CAT, 0); } | ID ENC { spl_asm_add(as, SPL_OP_ZERO, 0); } basic_expr { char *fname; my_asprintf(&fname, "encode_%s", $1); spl_asm_add(as, SPL_OP_PUSHAV, 0); spl_asm_add(as, SPL_OP_DCALL, fname); free(fname); free($1); } | ADD basic_expr %prec NEG | SUB basic_expr %prec NEG { spl_asm_add(as, SPL_OP_NEG, 0); } | LNOT basic_expr { spl_asm_add(as, SPL_OP_LNOT, 0); } | UNDEF { spl_asm_add(as, SPL_OP_UNDEF, 0); } | DEFINED rvalue { spl_asm_add(as, SPL_OP_DEFINED, 0); } | DECLARED lvalue { spl_asm_add(as, SPL_OP_DECLARED, 0); } | '[' { spl_asm_add(as, SPL_OP_UNDEF, 0); } array_list optional_list_tail ']' | function_method '(' { lbstack_push(); pbstack_push(spl_asm_add(as, $1 == 'F' ? SPL_OP_REGF : SPL_OP_REGM, 0)); } arglist_def optional_list_tail ')' { spl_asm_add(as, SPL_OP_CLEARA, 0); } '{' prog '}' { spl_asm_add(as, SPL_OP_UNDEF, 0); spl_asm_add(as, SPL_OP_RETURN, 0); spl_asm_setaddr(as, pbstack_pop(), spl_asm_add(as, SPL_OP_NOP, 0)); lbstack_pop(); } | EF_BEGIN { lbstack_push(); pbstack_push(spl_asm_add(as, SPL_OP_ENTER, 0)); } prog EF_END { spl_asm_add(as, SPL_OP_UNDEF, 0); spl_asm_add(as, SPL_OP_RETURN, 0); spl_asm_setaddr(as, pbstack_pop(), spl_asm_add(as, SPL_OP_NOP, 0)); lbstack_pop(); } | INSERT_PROG_BEGIN asm_list ';' { no_checkp_insn++; } prog INSERT_PROG_END asm_list ';' { no_checkp_insn--; } | RVALUE_CONTEXT_BEGIN { no_checkp_insn++; } { spl_asm_add(as, SPL_OP_BEGIN, 0); } basic_expr RVALUE_CONTEXT_END { spl_asm_add(as, SPL_OP_END, 0); } { no_checkp_insn--; } | func_expr '(' { pbstack_push(spl_asm_add(as, SPL_OP_ZERO, 0)); } arglist_call optional_list_tail ')' { pbstack_pop(); spl_asm_add(as, SPL_OP_LIFTCALL, 0); spl_asm_add(as, SPL_OP_CALL, 0); } | TRANSLATE_PREFIX { pbstack_push(spl_asm_add(as, SPL_OP_ZERO, 0)); } translate_args TRANSLATE_END { pbstack_pop(); spl_asm_add(as, SPL_OP_DCALL, "_"); } | simple_call ; simple_call: NEW { pbstack_push(spl_asm_add(as, SPL_OP_NOP, 0)); } rvalue_primitives '(' { pbstack_push(spl_asm_add(as, SPL_OP_ZERO, 0)); } arglist_call optional_list_tail ')' { int call_pos = spl_asm_add(as, SPL_OP_NEW, 0); int arg_pos = pbstack_pop(); int fp_pos = pbstack_pop(); int fp_size = arg_pos - fp_pos; int arg_size = call_pos - arg_pos; spl_asm_shuffle(as, fp_size, fp_pos, fp_pos + arg_size, arg_size, arg_pos, fp_pos, -1); } | MUL { pbstack_push(spl_asm_add(as, SPL_OP_NOP, 0)); } func_expr '(' { spl_asm_add(as, SPL_OP_SETCTX, 0); } { pbstack_push(spl_asm_add(as, SPL_OP_ZERO, 0)); } arglist_call optional_list_tail ')' { int call_pos = spl_asm_add(as, SPL_OP_CALL, 0); int arg_pos = pbstack_pop(); int fp_pos = pbstack_pop(); int fp_size = arg_pos - fp_pos; int arg_size = call_pos - arg_pos; spl_asm_shuffle(as, fp_size, fp_pos, fp_pos + arg_size, arg_size, arg_pos, fp_pos, -1); } | '$' ID '(' { pbstack_push(spl_asm_add(as, SPL_OP_ZERO, 0)); } arglist_call optional_list_tail ')' { pbstack_pop(); spl_asm_add(as, SPL_OP_CLIB, $2); free($2); } | FUNC_ID '(' { pbstack_push(spl_asm_add(as, SPL_OP_ZERO, 0)); } arglist_call optional_list_tail ')' { pbstack_pop(); spl_asm_add(as, SPL_OP_DCALL, $1); free($1); } ; rvalue_primitives: val_list | '(' complex_expr ')' | '#' '(' complex_expr ')' { spl_asm_add(as, SPL_OP_TOINT, 0); } | '.' '(' complex_expr ')' { spl_asm_add(as, SPL_OP_TOFLOAT, 0); } | lvalue { spl_asm_add(as, SPL_OP_GETVAL, 0); } ; complex_expr: lvalue '=' complex_expr { spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue SETCOPY complex_expr { spl_asm_add(as, SPL_OP_POPUC, 0); } | lvalue ADDEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_ADD, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue SUBEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_SUB, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue MULEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_MUL, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue DIVEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_DIV, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue MODEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_MOD, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue POWEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_POW, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue IADDEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_IADD, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue ISUBEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_ISUB, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue IMULEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_IMUL, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue IDIVEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_IDIV, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue IMODEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_IMOD, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue IPOWEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_IPOW, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue FADDEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_FADD, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue FSUBEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_FSUB, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue FMULEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_FMUL, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue FDIVEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_FDIV, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue FMODEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_FMOD, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue FPOWEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_FPOW, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue OADDEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_OBJOP, "add"); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue OSUBEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_OBJOP, "sub"); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue OMULEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_OBJOP, "mul"); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue ODIVEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_OBJOP, "div"); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue OMODEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_OBJOP, "mod"); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue OPOWEQ { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_OBJOP, "pow"); spl_asm_add(as, SPL_OP_POPIC, 0); } | lvalue SAPPEND { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } complex_expr { spl_asm_add(as, SPL_OP_CAT, 0); spl_asm_add(as, SPL_OP_POPIC, 0); } | basic_expr '?' { spl_asm_add(as, SPL_OP_UNLESS, 0); pbstack_push(spl_asm_add(as, SPL_OP_JUMP, 0)); } complex_expr ':' { int bp_pos = pbstack_pop(); pbstack_push(spl_asm_add(as, SPL_OP_JUMP, 0)); spl_asm_setaddr(as, bp_pos, spl_asm_add(as, SPL_OP_NOP, 0)); } complex_expr { spl_asm_setaddr(as, pbstack_pop(), spl_asm_add(as, SPL_OP_NOP, 0)); } | POP rvalue { spl_asm_add(as, SPL_OP_APOP, 0); } | SHIFT rvalue { spl_asm_add(as, SPL_OP_ASHIFT, 0); } | PUSH lvalue ',' complex_expr { spl_asm_add(as, SPL_OP_APUSH, 0); } | UNSHIFT lvalue ',' complex_expr { spl_asm_add(as, SPL_OP_AUNSHIFT, 0); } | NEXT basic_expr ',' rvalue { spl_asm_add(as, SPL_OP_NEXT, 0); } | PREV basic_expr ',' rvalue { spl_asm_add(as, SPL_OP_PREV, 0); } | EVAL rvalue { spl_asm_add(as, SPL_OP_EVAL, 0); } | basic_expr ; basic_expr: rvalue | basic_expr PEQ basic_expr { spl_asm_add(as, SPL_OP_PEQ, 0); } | basic_expr PNE basic_expr { spl_asm_add(as, SPL_OP_PEQ, 0); spl_asm_add(as, SPL_OP_LNOT, 0); } | basic_expr ADD basic_expr { spl_asm_add(as, SPL_OP_ADD, 0); } | basic_expr SUB basic_expr { spl_asm_add(as, SPL_OP_SUB, 0); } | basic_expr MUL basic_expr { spl_asm_add(as, SPL_OP_MUL, 0); } | basic_expr DIV basic_expr { spl_asm_add(as, SPL_OP_DIV, 0); } | basic_expr MOD basic_expr { spl_asm_add(as, SPL_OP_MOD, 0); } | basic_expr POW basic_expr { spl_asm_add(as, SPL_OP_POW, 0); } | basic_expr EQ basic_expr { spl_asm_add(as, SPL_OP_EQ, 0); } | basic_expr NE basic_expr { spl_asm_add(as, SPL_OP_NE, 0); } | basic_expr LT basic_expr { spl_asm_add(as, SPL_OP_LT, 0); } | basic_expr GE basic_expr { spl_asm_add(as, SPL_OP_GE, 0); } | basic_expr LE basic_expr { spl_asm_add(as, SPL_OP_LE, 0); } | basic_expr GT basic_expr { spl_asm_add(as, SPL_OP_GT, 0); } | basic_expr IADD basic_expr { spl_asm_add(as, SPL_OP_IADD, 0); } | basic_expr ISUB basic_expr { spl_asm_add(as, SPL_OP_ISUB, 0); } | basic_expr IMUL basic_expr { spl_asm_add(as, SPL_OP_IMUL, 0); } | basic_expr IDIV basic_expr { spl_asm_add(as, SPL_OP_IDIV, 0); } | basic_expr IMOD basic_expr { spl_asm_add(as, SPL_OP_IMOD, 0); } | basic_expr IPOW basic_expr { spl_asm_add(as, SPL_OP_IPOW, 0); } | basic_expr IEQ basic_expr { spl_asm_add(as, SPL_OP_IEQ, 0); } | basic_expr INE basic_expr { spl_asm_add(as, SPL_OP_INE, 0); } | basic_expr ILT basic_expr { spl_asm_add(as, SPL_OP_ILT, 0); } | basic_expr IGE basic_expr { spl_asm_add(as, SPL_OP_IGE, 0); } | basic_expr ILE basic_expr { spl_asm_add(as, SPL_OP_ILE, 0); } | basic_expr IGT basic_expr { spl_asm_add(as, SPL_OP_IGT, 0); } | basic_expr FADD basic_expr { spl_asm_add(as, SPL_OP_FADD, 0); } | basic_expr FSUB basic_expr { spl_asm_add(as, SPL_OP_FSUB, 0); } | basic_expr FMUL basic_expr { spl_asm_add(as, SPL_OP_FMUL, 0); } | basic_expr FDIV basic_expr { spl_asm_add(as, SPL_OP_FDIV, 0); } | basic_expr FMOD basic_expr { spl_asm_add(as, SPL_OP_FMOD, 0); } | basic_expr FPOW basic_expr { spl_asm_add(as, SPL_OP_FPOW, 0); } | basic_expr FEQ basic_expr { spl_asm_add(as, SPL_OP_FEQ, 0); } | basic_expr FNE basic_expr { spl_asm_add(as, SPL_OP_FNE, 0); } | basic_expr FLT basic_expr { spl_asm_add(as, SPL_OP_FLT, 0); } | basic_expr FGE basic_expr { spl_asm_add(as, SPL_OP_FGE, 0); } | basic_expr FLE basic_expr { spl_asm_add(as, SPL_OP_FLE, 0); } | basic_expr FGT basic_expr { spl_asm_add(as, SPL_OP_FGT, 0); } | basic_expr OADD basic_expr { spl_asm_add(as, SPL_OP_OBJOP, "add"); } | basic_expr OSUB basic_expr { spl_asm_add(as, SPL_OP_OBJOP, "sub"); } | basic_expr OMUL basic_expr { spl_asm_add(as, SPL_OP_OBJOP, "mul"); } | basic_expr ODIV basic_expr { spl_asm_add(as, SPL_OP_OBJOP, "div"); } | basic_expr OMOD basic_expr { spl_asm_add(as, SPL_OP_OBJOP, "mod"); } | basic_expr OPOW basic_expr { spl_asm_add(as, SPL_OP_OBJOP, "pow"); } | basic_expr OEQ basic_expr { spl_asm_add(as, SPL_OP_OBJOP, "eq"); } | basic_expr ONE basic_expr { spl_asm_add(as, SPL_OP_OBJOP, "ne"); } | basic_expr OLT basic_expr { spl_asm_add(as, SPL_OP_OBJOP, "lt"); } | basic_expr OGE basic_expr { spl_asm_add(as, SPL_OP_OBJOP, "ge"); } | basic_expr OLE basic_expr { spl_asm_add(as, SPL_OP_OBJOP, "le"); } | basic_expr OGT basic_expr { spl_asm_add(as, SPL_OP_OBJOP, "gt"); } | basic_expr SEQ basic_expr { spl_asm_add(as, SPL_OP_SEQ, 0); } | basic_expr SNE basic_expr { spl_asm_add(as, SPL_OP_SNE, 0); } | basic_expr SLT basic_expr { spl_asm_add(as, SPL_OP_SLT, 0); } | basic_expr SGE basic_expr { spl_asm_add(as, SPL_OP_SGE, 0); } | basic_expr SLE basic_expr { spl_asm_add(as, SPL_OP_SLE, 0); } | basic_expr SGT basic_expr { spl_asm_add(as, SPL_OP_SGT, 0); } /*** OR and AND with logical ops *** | basic_expr LOR basic_expr { spl_asm_add(as, SPL_OP_LOR, 0); } | basic_expr LAND basic_expr { spl_asm_add(as, SPL_OP_LAND, 0); } ***********************************/ | basic_expr LOR { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_IF, 0); pbstack_push(spl_asm_add(as, SPL_OP_JUMP, 0)); spl_asm_add(as, SPL_OP_DROP, 0); } basic_expr { spl_asm_setaddr(as, pbstack_pop(), spl_asm_add(as, SPL_OP_NOP, 0)); } | basic_expr LAND { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_UNLESS, 0); pbstack_push(spl_asm_add(as, SPL_OP_JUMP, 0)); spl_asm_add(as, SPL_OP_DROP, 0); } basic_expr { spl_asm_setaddr(as, pbstack_pop(), spl_asm_add(as, SPL_OP_NOP, 0)); } | basic_expr REGEX basic_expr REGEX_SEP basic_expr REGEX_SEP { spl_asm_add(as, SPL_OP_REMATCH, 0); } | basic_expr REGEX_SUBST_R basic_expr REGEX_SEP basic_expr REGEX_SEP basic_expr REGEX_SEP { spl_asm_add(as, SPL_OP_RESUBST, 0); } | lvalue REGEX_SUBST basic_expr REGEX_SEP basic_expr REGEX_SEP basic_expr REGEX_SEP { spl_asm_add(as, SPL_OP_RESUBST, 0); } | basic_expr NREGEX basic_expr REGEX_SEP basic_expr REGEX_SEP { spl_asm_add(as, SPL_OP_REMATCH, 0); } { spl_asm_add(as, SPL_OP_LNOT, 0); } | basic_expr NREGEX_SUBST_R basic_expr REGEX_SEP basic_expr REGEX_SEP basic_expr REGEX_SEP { spl_asm_add(as, SPL_OP_RESUBST, 0); } { spl_asm_add(as, SPL_OP_LNOT, 0); } | lvalue NREGEX_SUBST basic_expr REGEX_SEP basic_expr REGEX_SEP basic_expr REGEX_SEP { spl_asm_add(as, SPL_OP_RESUBST, 0); } { spl_asm_add(as, SPL_OP_LNOT, 0); } | regex_eval_mode { spl_asm_add(as, SPL_OP_BEGIN, 0); if ($1 & REGEX_EVAL_SUBST) { spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_PUSHC, "#name"); spl_asm_add(as, SPL_OP_XCHG, 0); spl_asm_add(as, SPL_OP_POPL, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); } spl_asm_add(as, SPL_OP_PUSHC, "#text"); spl_asm_add(as, SPL_OP_XCHG, 0); spl_asm_add(as, SPL_OP_POPL, 0); spl_asm_add(as, SPL_OP_PUSHC, "#array"); spl_asm_add(as, SPL_OP_PUSH, "#text"); } basic_expr REGEX_SEP basic_expr REGEX_SEP { spl_asm_add(as, SPL_OP_REMATCH, 0); spl_asm_add(as, SPL_OP_POPL, 0); spl_asm_add(as, SPL_OP_PUSHC, "#index"); spl_asm_add(as, SPL_OP_UNDEF, 0); spl_asm_add(as, SPL_OP_POPL, 0); spl_asm_add(as, SPL_OP_PUSHC, "#newtext"); spl_asm_add(as, SPL_OP_UNDEF, 0); spl_asm_add(as, SPL_OP_POPL, 0); pbstack_push(spl_asm_add(as, SPL_OP_NOP, 0)); spl_asm_add(as, SPL_OP_PUSHC, "#index"); spl_asm_add(as, SPL_OP_PUSH, "#array"); spl_asm_add(as, SPL_OP_PUSH, "#index"); spl_asm_add(as, SPL_OP_NEXT, 0); spl_asm_add(as, SPL_OP_COPY, 0); spl_asm_add(as, SPL_OP_DEFINED, 0); spl_asm_add(as, SPL_OP_UNLESS, 0); pbstack_push(spl_asm_add(as, SPL_OP_JUMP, 0)); spl_asm_add(as, SPL_OP_POPL, 0); spl_asm_add(as, SPL_OP_PUSHC, "#reres"); spl_asm_add(as, SPL_OP_PUSHC, "#array"); spl_asm_add(as, SPL_OP_PUSH, "#index"); spl_asm_add(as, SPL_OP_HENC, 0); spl_asm_add(as, SPL_OP_DOTCAT, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); spl_asm_add(as, SPL_OP_POPL, 0); spl_asm_add(as, SPL_OP_PUSHC, "#newtext"); spl_asm_add(as, SPL_OP_PUSH, "#newtext"); spl_asm_add(as, SPL_OP_PUSH, "#reres.=NC"); spl_asm_add(as, SPL_OP_CAT, 0); spl_asm_add(as, SPL_OP_BEGIN, 0); } basic_expr { spl_asm_add(as, SPL_OP_END, 0); int finish_jump = pbstack_pop(); spl_asm_add(as, SPL_OP_CAT, 0); spl_asm_add(as, SPL_OP_POPL, 0); spl_asm_setaddr(as, spl_asm_add(as, SPL_OP_JUMP, 0), pbstack_pop()); spl_asm_setaddr(as, finish_jump, spl_asm_add(as, SPL_OP_DROP, 0)); spl_asm_add(as, SPL_OP_DROP, 0); spl_asm_add(as, SPL_OP_PUSH, "#newtext"); spl_asm_add(as, SPL_OP_DEFINED, 0); spl_asm_add(as, SPL_OP_UNLESS, 0); int p1 = spl_asm_add(as, SPL_OP_JUMP, 0); if ($1 & REGEX_EVAL_SUBST) spl_asm_add(as, SPL_OP_PUSH, "#name"); spl_asm_add(as, SPL_OP_PUSH, "#newtext"); spl_asm_add(as, SPL_OP_PUSHC, "#array"); spl_asm_add(as, SPL_OP_PUSH, "#index"); spl_asm_add(as, SPL_OP_HENC, 0); spl_asm_add(as, SPL_OP_DOTCAT, 0); spl_asm_add(as, SPL_OP_PUSHC, "=LC"); spl_asm_add(as, SPL_OP_DOTCAT, 0); spl_asm_add(as, SPL_OP_GETVAL, 0); spl_asm_add(as, SPL_OP_CAT, 0); if ($1 & REGEX_EVAL_SUBST) { spl_asm_add(as, SPL_OP_POPI, 0); spl_asm_setaddr(as, p1, spl_asm_add(as, SPL_OP_NOP, 0)); } if ($1 & REGEX_EVAL_RETURN) { int p2 = spl_asm_add(as, SPL_OP_JUMP, 0); spl_asm_setaddr(as, p1, spl_asm_add(as, SPL_OP_PUSH, "#text")); spl_asm_setaddr(as, p2, spl_asm_add(as, SPL_OP_NOP, 0)); } if ($1 & REGEX_EVAL_SUBST) { spl_asm_add(as, SPL_OP_PUSH, "#array"); spl_asm_add(as, SPL_OP_ZERO, 0); spl_asm_add(as, SPL_OP_IADD, 0); } spl_asm_add(as, SPL_OP_END, 0); if ($1 & REGEX_EVAL_NEG) spl_asm_add(as, SPL_OP_LNOT, 0); CHECKPOINT(); } ; optional_list_tail: | ',' optional_list_tail ; array_list: | array_element | array_list ',' array_element ; array_element: complex_expr ARRAYREF { spl_asm_add(as, SPL_OP_HENC, 0); } complex_expr { spl_asm_add(as, SPL_OP_APUSHREFID, 0); } | hash_index_lvalue ':' complex_expr { spl_asm_add(as, SPL_OP_APUSHREFID, 0); } | complex_expr { spl_asm_add(as, SPL_OP_APUSHREF, 0); } ; func_expr: '(' complex_expr ')' | lvalue { spl_asm_add(as, SPL_OP_GETVAL, 0); } ; arglist_def: | arglist_def_entry | arglist_def ',' arglist_def_entry ; arglist_def_entry: ID { spl_asm_add(as, SPL_OP_POPA, $1); free($1); } | '@' ID { spl_asm_add(as, SPL_OP_PUSHC, $2); spl_asm_add(as, SPL_OP_APOPA, 0); free($2); } | MOD ID { spl_asm_add(as, SPL_OP_PUSHC, $2); spl_asm_add(as, SPL_OP_UNDEF, 0); spl_asm_add(as, SPL_OP_POPLC, 0); spl_asm_add(as, SPL_OP_HGETA, 0); free($2); } ; arglist_call: | arglist_call_entry | arglist_call ',' arglist_call_entry ; translate_args: | arglist_call_entry | translate_args TRANSLATE_SEPERATOR arglist_call_entry ; arglist_call_entry: { pbstack_push(spl_asm_add(as, SPL_OP_NOP, 0)); } arglist_call_entry_tail ; arglist_call_entry_tail: complex_expr { int begin_this_param = pbstack_pop(); int begin_param_list = pbstack_pop() + 1; int size_list = begin_this_param - begin_param_list; int size_this = spl_asm_add(as, SPL_OP_PUSHAV, 0) + 1 - begin_this_param; spl_asm_shuffle(as, size_list, begin_param_list, begin_param_list+size_this, size_this, begin_this_param, begin_param_list, -1); pbstack_push(begin_param_list - 1); } | '@' complex_expr { int begin_this_param = pbstack_pop(); int begin_param_list = pbstack_pop() + 1; int size_list = begin_this_param - begin_param_list; int size_this = spl_asm_add(as, SPL_OP_APUSHA, 0) + 1 - begin_this_param; spl_asm_shuffle(as, size_list, begin_param_list, begin_param_list+size_this, size_this, begin_this_param, begin_param_list, -1); pbstack_push(begin_param_list - 1); } | MOD complex_expr { int begin_this_param = pbstack_pop(); int begin_param_list = pbstack_pop() + 1; int size_list = begin_this_param - begin_param_list; int size_this = spl_asm_add(as, SPL_OP_HSETA, 0) + 1 - begin_this_param; spl_asm_shuffle(as, size_list, begin_param_list, begin_param_list+size_this, size_this, begin_this_param, begin_param_list, -1); pbstack_push(begin_param_list - 1); } | hash_index_lvalue ':' complex_expr { int begin_this_param = pbstack_pop(); int begin_param_list = pbstack_pop() + 1; int size_list = begin_this_param - begin_param_list; int size_this = spl_asm_add(as, SPL_OP_APUSHREFID, 0) + 1 - begin_this_param; spl_asm_shuffle(as, size_list, begin_param_list, begin_param_list+size_this, size_this, begin_this_param, begin_param_list, -1); pbstack_push(begin_param_list - 1); } ; hash_index_lvalue: VALUE { spl_asm_add(as, SPL_OP_PUSHC, $1); free($1); } | lvalue ; %% static struct { char *text; int lex; } lex_ops[] = { { "(**)=", OPOWEQ }, { "#**=", IPOWEQ }, { ".**=", FPOWEQ }, { "(**)", OPOW }, { "(+)=", OADDEQ }, { "(-)=", OSUBEQ }, { "(*)=", OMULEQ }, { "(/)=", ODIVEQ }, { "(%)=", OMODEQ }, { "(==)", OEQ }, { "(!=)", ONE }, { "(<=)", OLE }, { "(>=)", OGE }, { "(<)", OLT }, { "(>)", OGT }, { "(+)", OADD }, { "(-)", OSUB }, { "(*)", OMUL }, { "(/)", ODIV }, { "(%)", OMOD }, { "[*]", SPECIALREF }, { "[+]", SPECIALREF }, { "[/]", SPECIALREF }, { "[.]", SPECIALREF }, { ">>>", STRING_EOL }, { "<<<", STRING_EOL_S }, { "<=>", XCHG }, { "^==", PEQ }, { "^!=", PNE }, { "#==", IEQ }, { "#!=", INE }, { "#<=", ILE }, { "#>=", IGE }, { ".==", FEQ }, { ".!=", FNE }, { ".<=", FLE }, { ".>=", FGE }, { "~==", SEQ }, { "~!=", SNE }, { "~<=", SLE }, { "~>=", SGE }, { "#+=", IADDEQ }, { "#-=", ISUBEQ }, { "#*=", IMULEQ }, { "#/=", IDIVEQ }, { "#%=", IMODEQ }, { ".+=", FADDEQ }, { ".-=", FSUBEQ }, { ".*=", FMULEQ }, { "./=", FDIVEQ }, { ".%=", FMODEQ }, { "**=", POWEQ }, { "#**", IPOW }, { ".**", FPOW }, { ">>", STRING_LABEL }, { "<<", STRING_LABEL_S }, { "#<", ILT }, { "#>", IGT }, { ".<", FLT }, { ".>", FGT }, { "~<", SLT }, { "~>", SGT }, { ",,", DBLCOMMA }, { ":=", SETCOPY }, { "||", LOR }, { "&&", LAND }, { "++", INC }, { "--", DEC }, { "==", EQ }, { "!=", NE }, { "<=", LE }, { ">=", GE }, { "+=", ADDEQ }, { "-=", SUBEQ }, { "*=", MULEQ }, { "/=", DIVEQ }, { "%=", MODEQ }, { "#+", IADD }, { "#-", ISUB }, { "#*", IMUL }, { "#/", IDIV }, { "#%", IMOD }, { ".+", FADD }, { ".-", FSUB }, { ".*", FMUL }, { "./", FDIV }, { ".%", FMOD }, { "**", POW }, { "::", ENC }, { "~=", SAPPEND }, { "=>", ARRAYREF }, { "<", LT }, { ">", GT }, { "!", LNOT }, { "+", ADD }, { "-", SUB }, { "*", MUL }, { "/", DIV }, { "%", MOD }, { "~", CAT }, { 0, 0 } }; static struct { char *text; int lex; } lex_keywords[] = { { "debug", DEBUG }, { "warning", WARNING }, { "panic", ERROR }, { "delete", DELETE }, { "function", FUNCTION }, { "method", METHOD }, { "import", IMPORT }, { "load", LOAD }, { "new", NEW }, { "this", THIS }, { "object", OBJECT }, { "var", VAR }, { "static", STATIC }, { "if", IF }, { "else", ELSE }, { "do", DO }, { "while", WHILE }, { "for", FOR }, { "foreach", FOREACH }, { "asm", ASM }, { "return", RETURN }, { "exit", EXIT }, { "defined", DEFINED }, { "declared", DECLARED }, { "undef", UNDEF }, { "goto", GOTO }, { "break", BREAK }, { "continue", CONTINUE }, { "pop", POP }, { "shift", SHIFT }, { "push", PUSH }, { "unshift", UNSHIFT }, { "next", NEXT }, { "prev", PREV }, { "eval", EVAL }, { "not", LNOT }, { "and", LAND }, { "or", LOR }, { "try", TRY }, { "catch", CATCH }, { "throw", THROW }, { "switch", SWITCH }, { "default", DEFAULT }, { "case", CASE }, { "lengthof", LENGTHOF }, { "elementsof", ELEMENTSOF }, { 0, 0 } }; struct lex_srcfile; struct lex_srcfile { struct lex_srcfile *next; const char *text; const char *name; }; struct lex_item; struct lex_item { char *text; int lex; struct lex_item *next, *prev; struct lex_srcfile *src; const char *pos; }; static const char *input; static char *encoding; static spl_malloc_file_function *malloc_file_func; static struct lex_item *lex_item_list_first; static struct lex_item *lex_item_list_last; static struct lex_item *lex_last_item; static struct lex_item *lex_translate_ptr; int lex_translate_count; static struct lex_srcfile *lex_srcfile_list; static struct lex_srcfile *lex_srcfile_current; static jmp_buf lex_goterr; static char *lex_errstr; struct define { int active, isarg; char *name, *text, *args; struct define *next; }; static int in_pragma_arg; static struct define *define_list; static char *current_define; static int define_recursion; static struct lex_item *lex_insert(struct lex_item *insertafter, int lex, char *text) { if (in_pragma_arg) { if (text) free(text); return 0; } struct lex_item *item = calloc(1, sizeof(struct lex_item)); if (spl_yydebug) fprintf(stderr, "[Lexer: new symbol %s (%d) with (%s) value]\n", yytname[YYTRANSLATE(lex)], lex, text ? text : ""); item->text = text; item->lex = lex; item->src = lex_srcfile_current; item->pos = input; if (insertafter == lex_item_list_last) insertafter = 0; if (insertafter) { struct lex_item *next = insertafter->next; next->prev = insertafter->next = item; item->prev = insertafter; item->next = next; } else { if (lex_item_list_first) { lex_item_list_last->next = item; item->prev = lex_item_list_last; } else lex_item_list_first = item; lex_item_list_last = item; } return item; } static struct lex_item *lex_new(int lex, char *text) { return lex_insert(0, lex, text); } static struct lex_item *lex_new_translate(int lex, char *text) { if (!lex_translate_ptr) return lex_insert(0, lex, text); lex_translate_ptr = lex_insert(lex_translate_ptr, lex, text); return lex_translate_ptr; } static int spl_yylex() { if ( !lex_item_list_first ) return 0; int lex = lex_item_list_first->lex; spl_yylval.text = lex_item_list_first->text; if (spl_yydebug) fprintf(stderr, "[Lexer: shifting symbol %s (%d) with (%s) value] ", yytname[YYTRANSLATE(lex)], lex, spl_yylval.text ? spl_yylval.text : ""); if (lex_last_item) free(lex_last_item); lex_last_item = lex_item_list_first; lex_item_list_first = lex_item_list_first->next; return lex; } static void spl_lex_pragma(); static void spl_lex_string(char *term, int flags, int indenting_delim); static void spl_lex_prog_string(char *text); static void spl_lex_prog(char *term); #define LEX_STRING_BACKSLASHES 0x0001 #define LEX_STRING_SPLTAGS 0x0002 #define LEX_STRING_REGEX 0x0004 static char *malloc_file_wrapper(const char *filename, int is_embedded) { if ( is_embedded ) { char *token; my_asprintf(&token, "#embedded-file %s ", filename); const char *begin = strstr(lex_srcfile_current->text, token); int token_size = strlen(token); free(token); if ( !begin ) return 0; begin += token_size; token_size = strcspn(begin, " \r\n\t"); if ( token_size <= 0 ) return 0; token = my_strndup(begin, token_size); begin = begin + token_size; if ( *begin == ' ' || *begin == '\t' || *begin == '\r' || *begin == '\n' ) begin++; const char *end = strstr(begin, token); free(token); if ( !end ) return 0; return my_strndup(begin, end-begin); } if ( malloc_file_func ) { char *retval; if (*filename != '/' && as->vm && as->vm->current_dir_name) { char fullname[strlen(filename) + strlen(as->vm->current_dir_name) + 2]; sprintf(fullname, "%s/%s", as->vm->current_dir_name, filename); retval = malloc_file_func(fullname, 0); } else retval = malloc_file_func(filename, 0); if (retval) { if (encoding) { char *orig_retval = retval; retval = spl_utf8_import(orig_retval, encoding); free(orig_retval); if (!retval) { spl_report(SPL_REPORT_COMPILER, as, "Character set (#encoding) '%s' is unknown!\n", encoding); return 0; } } if ( spl_utf8_check(retval) ) { spl_report(SPL_REPORT_COMPILER, as, "Compiler tried to load non-utf8 encoded text!\n" "A decoder bug or just a missing '#encoding' pragma?\n"); free(retval); return 0; } } return retval; } return 0; } static char *get_pragma_body() { int data_len = strcspn(input, "\n"); while (input[data_len] == '\n' && input[data_len+1] == '\\') data_len += strcspn(input+data_len+1, "\n")+1; char *ret = my_strndup(input, data_len); input += data_len; for (char *in=ret, *out=ret; (*out = *in) != 0; in++, out++) if (in[0] == '\n' && in[1] == '\\') in++; return ret; } static void import_asm(int mode, unsigned char *bytecode, unsigned char *map, int offset, int bytecode_len) { int next_condit = 0; next_op: if (offset < 16 || offset >= bytecode_len) { my_asprintf(&lex_errstr, "Error while importing bytecode file."); longjmp(lex_goterr, __LINE__); } unsigned char op = bytecode[offset]; int arg_size = 0, arg = 0; char *txtarg = 0; if (op == SPL_OP_HALT || (mode == 0 && map[offset])) return; int this_offset = offset++; if (op < 0x60) { arg_size = 4 - (op & 3); op = op & ~3; arg = spl_bytes_to_int(arg_size, bytecode + offset); offset += arg_size; if (op >= 0x20) { if (offset + arg >= bytecode_len || offset + arg < 16) { my_asprintf(&lex_errstr, "Error while importing bytecode file."); longjmp(lex_goterr, __LINE__); } txtarg = (char*)(bytecode + offset + arg); } } if (mode == 0) { int optype = spl_optype(op); map[this_offset] = 1; int this_condit = next_condit; next_condit = optype == 'C'; if (optype == 'J' && !this_condit) { offset += arg; goto next_op; } if (optype == 'J' || optype == 'B') import_asm(mode, bytecode, map, offset + arg, bytecode_len); if (optype == 'E') return; } if (mode == 1) { char buffer[100 + (txtarg ? strlen(txtarg)*2 : 0)]; int pos = sprintf(buffer, ":BCI_%d_%d %s", import_asm_label_counter, this_offset, spl_asm_op2txt(op)); if (txtarg) { buffer[pos++] = ' '; buffer[pos++] = '\"'; for (int i=0; txtarg[i]; i++) { if (txtarg[i] == '\n') { buffer[pos++] = '\\'; buffer[pos++] = 'n'; continue; } if (txtarg[i] == '\"') { buffer[pos++] = '\\'; buffer[pos++] = '\"'; continue; } if (txtarg[i] == '\\') { buffer[pos++] = '\\'; buffer[pos++] = '\\'; continue; } buffer[pos++] = txtarg[i]; } buffer[pos++] = '\"'; } if (op < 0x20) pos += sprintf(buffer+pos, " :BCI_%d_%d", import_asm_label_counter, offset + arg); buffer[pos++] = 0; lex_new(VALUE, strdup(buffer)); while (offset < bytecode_len && !map[offset]) offset++; if (offset >= bytecode_len) return; } goto next_op; } static void spl_lex_pragma() { static const char *stop_string = " \t\r\n;,(){}[]*-+~!\'\"@#$%^&=?"; static const char *hard_pragmas[] = { "define", "undef", "encoding", "file-as-const", "file-as-code", "file-as-template", "file-as-bytecode", "embedded-file", 0 }; // ignore '#!...\n' strings if (*input == '!') { while (*input && *input != '\n') input++; return; } int stmt_len = strcspn(input, stop_string); for (int i=0; hard_pragmas[i]; i++) { int len = strlen(hard_pragmas[i]); if (!strncmp(input, hard_pragmas[i], len) && strcspn(input+len, stop_string) == 0) { stmt_len = len; break; } } char *stmt = my_strndupa(input, stmt_len); input += stmt_len; input += strspn(input, " \t\r\n"); int arg_has_asterisk_prefix = 0; char *arg = 0; if (!strncmp(stmt, "file-as-", 8)) if ( *input == '*' ) { arg_has_asterisk_prefix=1; input++; } if (!strncmp(stmt, "file-as-", 8) || !strcmp(stmt, "encoding") || !strcmp(stmt, "define") || !strcmp(stmt, "undef") || !strcmp(stmt, "embedded-file")) { int arg_len = strcspn(input, stop_string); arg = my_strndupa(input, arg_len); input += arg_len; } if ( !strcmp(stmt, "encoding") ) { return; } if ( !strcmp(stmt, "define") ) { if (in_pragma_arg) { free(get_pragma_body()); return; } struct define *def = calloc(1, sizeof(struct define)); if (*input == '(') { int len = strcspn(input+1, ")"); def->args = my_strndup(input+1, len); for (char *in=def->args, *out=def->args; (*out = *in) != 0; in++, out++) { while (in[1] == '\n' || in[1] == '\r' || in[1] == '\t' || in[1] == ' ') in++; } input += len+2; } def->name = strdup(arg); def->text = get_pragma_body(); def->next = define_list; define_list = def; return; } if ( !strcmp(stmt, "embedded-file") && *input == ' ' ) { int token_size = strcspn(++input, " \r\n\t"); if ( token_size <= 0 ) goto unknown_pragma; char *token = my_strndup(input, token_size); input = input + token_size; const char *end = strstr(input, token); free(token); if ( !end ) { my_asprintf(&lex_errstr, "Can't find end label for #%s.", stmt); longjmp(lex_goterr, __LINE__); } input = end + token_size; return; } if (in_pragma_arg) return; if ( !strcmp(stmt, "undef") ) { struct define **lastp = &define_list; struct define *def = define_list; while (def) { if (!def->isarg && !strcmp(def->name, arg)) { *lastp = def->next; free(def->name); free(def->text); if (def->args) free(def->args); free(def); def = *lastp; } else { lastp = &def->next; def = def->next; } } return; } if ( !strcmp(stmt, "file-as-const") ) { char *text = malloc_file_wrapper(arg, arg_has_asterisk_prefix); if ( !text ) { my_asprintf(&lex_errstr, "Can't open #file-as-const file (%s%s).", arg_has_asterisk_prefix ? "*" : "", arg); longjmp(lex_goterr, __LINE__); } lex_new(VALUE, text); return; } if ( !strcmp(stmt, "file-as-code") ) { char *text = malloc_file_wrapper(arg, arg_has_asterisk_prefix); if ( !text ) { my_asprintf(&lex_errstr, "Can't open #file-as-code file (%s%s).", arg_has_asterisk_prefix ? "*" : "", arg); longjmp(lex_goterr, __LINE__); } struct lex_srcfile *oldsrc = lex_srcfile_current; const char *oldinput = input; lex_srcfile_current = malloc(sizeof(struct lex_srcfile)); lex_srcfile_current->text = input = text; lex_srcfile_current->name = strdup(arg); lex_srcfile_current->next = lex_srcfile_list; lex_srcfile_list = lex_srcfile_current; spl_lex_prog(0); lex_srcfile_current = oldsrc; input = oldinput; return; } if ( !strcmp(stmt, "file-as-template") ) { char *text = malloc_file_wrapper(arg, arg_has_asterisk_prefix); if ( !text ) { my_asprintf(&lex_errstr, "Can't open #file-as-template file (%s%s).", arg_has_asterisk_prefix ? "*" : "", arg); longjmp(lex_goterr, __LINE__); } struct lex_srcfile *oldsrc = lex_srcfile_current; const char *oldinput = input; lex_srcfile_current = malloc(sizeof(struct lex_srcfile)); lex_srcfile_current->text = input = text; lex_srcfile_current->name = strdup(arg); lex_srcfile_current->next = lex_srcfile_list; lex_srcfile_list = lex_srcfile_current; spl_lex_string(0, LEX_STRING_SPLTAGS, 0); lex_srcfile_current = oldsrc; input = oldinput; return; } if ( !strcmp(stmt, "file-as-bytecode") ) { unsigned char *bytecode = 0; int bytecode_len = 0; if (malloc_file_func) { if (*arg != '/' && as->vm && as->vm->current_dir_name) { char fullname[strlen(arg) + strlen(as->vm->current_dir_name) + 2]; sprintf(fullname, "%s/%s", as->vm->current_dir_name, arg); bytecode = malloc_file_func(fullname, &bytecode_len); } else bytecode = malloc_file_func(arg, &bytecode_len); } if (!bytecode) { my_asprintf(&lex_errstr, "Can't open #file-as-bytecode file (%s).", arg); longjmp(lex_goterr, __LINE__); } if (bytecode_len < 16 || memcmp(bytecode, SPL_SIGNATURE, 16)) { my_asprintf(&lex_errstr, "Bytecode from #file-as-bytecode file (%s) has invalid signature.", arg); longjmp(lex_goterr, __LINE__); } lex_new(ASM, 0); import_asm_label_counter++; unsigned char *map = calloc(1, bytecode_len); import_asm(0, bytecode, map, 16, bytecode_len); import_asm(1, bytecode, map, 16, bytecode_len); lex_new(';', 0); free(bytecode); free(map); return; } for (struct define *def = define_list; def; def = def->next) if (!strcmp(def->name, stmt)) { lex_new(VALUE, strdup(def->text)); return; } unknown_pragma: my_asprintf(&lex_errstr, "Unknown compiler-pragma #%s.", stmt); longjmp(lex_goterr, __LINE__); } static void spl_lex_new_string(char *text, int flags) { if ( flags & LEX_STRING_BACKSLASHES ) { int i, j; for (i=0, j=0; text[i]; i++, j++) if ( text[i] == '\\' && text[i+1] ) { switch ( text[++i] ) { case 'a': text[j] = '\a'; break; case 'b': text[j] = '\b'; break; case 't': text[j] = '\t'; break; case 'n': text[j] = '\n'; break; case 'v': text[j] = '\v'; break; case 'f': text[j] = '\f'; break; case 'r': text[j] = '\r'; break; default: text[j] = input[i]; } } else text[j] = text[i]; text[j] = text[i]; } else { int i, j; for (i=0, j=0; text[i]; i++, j++) { text[j] = text[i]; } text[j] = text[i]; } if (lex_translate_ptr) { int newtext_len = 0; char *newtext; for (int i=0; text[i]; i++, newtext_len++) if (text[i] == '{') newtext_len++; newtext = malloc(newtext_len+1); for (int i=0, j=0; text[i]; i++) { newtext[j++] = text[i]; if (text[i] == '{') newtext[j++] = '}'; } newtext[newtext_len] = 0; free(text); text = newtext; } lex_new_translate(VALUE, text); } static int spl_lex_findelsetag() { int offset = strspn(input, " \t\r\n"); if (!strncmp(input + offset, "", 10)) { input += offset + 10; return 1; } return 0; } static void spl_lex_splifcalltag(int flags, int indenting_delim, int isiftag) { int len = strcspn(input, " \t\r\n>"); char *funcname, *endtag; int hasbody = 1; my_asprintf(&funcname, "%s_%.*s", isiftag ? "splif" : "splcall", len, input); my_asprintf(&endtag, "", isiftag ? "splif" : "splcall", len, input); input += len; lex_new('(', 0); lex_new(ID, funcname); lex_new('(', 0); while (1) { input += strspn(input, " \t\r\n"); if (!isiftag && input[0] == '/' && input[1] == '>') { hasbody = 0; input++; } if (*input == '>' || *input == 0) { if (*input) input++; break; } len = strcspn(input, " \t\r\n=\"\'(>"); lex_new(ID, my_strndup(input, len)); lex_new(':', 0); input += len; input += strcspn(input, "\'\"(>"); if (*input == '"') { input++; spl_lex_string("\"", LEX_STRING_BACKSLASHES, 0); if (*input == '"') input++; } if (*input == '\'') { input++; spl_lex_string("'", LEX_STRING_BACKSLASHES, 0); if (*input == '"') input++; } if (*input == '(') { input++; spl_lex_prog(")"); if (*input == ')') input++; } lex_new(',', 0); } if (!isiftag && hasbody) { lex_new(FUNCTION, 0); lex_new('(', 0); lex_new(')', 0); lex_new('{', 0); lex_new(RETURN, 0); spl_lex_string(endtag, flags, indenting_delim); lex_new(';', 0); lex_new('}', 0); } lex_new(')', 0); if (isiftag) { lex_new('?', 0); spl_lex_string(endtag, flags, indenting_delim); lex_new(':', 0); if (spl_lex_findelsetag()) spl_lex_string("", flags, indenting_delim); else lex_new(VALUE, strdup("")); } lex_new(')', 0); free(endtag); } static void spl_lex_spltag(int flags, int indenting_delim) { int len = strcspn(input, " \t\r\n>"); char *tag = my_strndupa(input, len); char *attr_var = 0; char *attr_list = 0; char *attr_code = 0; char *attr_char = 0; input += len; input += strspn(input, " \t\r\n"); while ( *input && *input != '>' ) { int inner_len = strcspn(input, " \t\r\n\"=>"); char *attr = my_strndupa(input, inner_len); input += inner_len; if ( *input == '=' ) { input++; if ( *input == '"' ) { input++; inner_len = strcspn(input, "\""); } else inner_len = strcspn(input, " \t\r\n\">"); char *value = my_strndupa(input, inner_len); input += inner_len; if (*input == '"') input++; input += strspn(input, " \t\r\n"); if ( !strcmp(attr, "var") ) attr_var = value; if ( !strcmp(attr, "list") ) attr_list = value; if ( !strcmp(attr, "code") ) attr_code = value; if ( !strcmp(attr, "char") ) attr_char = value; } } if (*input == '>') input++; if ( tag[0] == '_' && tag[strlen(tag)-1] == '_' ) { int endtag_len = strlen(tag) + 10; char endtag[endtag_len]; snprintf(endtag, endtag_len, "", tag); lex_new('(', 0); lex_new(ID, strdup(tag)); spl_lex_string(endtag, flags, indenting_delim); lex_new(')', 0); return; } if ( !strcmp(tag, "comment") ) { char *end = strstr(input, ""); if (!end) { my_asprintf(&lex_errstr, "missing ."); longjmp(lex_goterr, __LINE__); } input = end + strlen(""); lex_new(VALUE, strdup("")); return; } if ( !strcmp(tag, "inline") ) { lbstack_push(); lex_new(INSERT_PROG_BEGIN, 0); lex_new(';', 0); spl_lex_prog(""); input += strlen(""); lex_new(INSERT_PROG_END, 0); lex_new(VALUE, strdup("undef")); lex_new(';', 0); lbstack_pop(); return; } if ( !strcmp(tag, "code") ) { if ( !attr_code ) { lex_new(EF_BEGIN, 0); spl_lex_prog(""); input += strlen(""); lex_new(EF_END, 0); } else { lex_new(EF_BEGIN, 0); lex_new(VAR, 0); lex_new(ID, strdup("_data_")); lex_new('=', 0); spl_lex_string("", flags, indenting_delim); lex_new(';', 0); lex_new(RETURN, 0); spl_lex_prog_string(strdup(attr_code)); lex_new(';', 0); lex_new(EF_END, 0); } return; } if ( !strcmp(tag, "var") ) { if ( !attr_var ) { my_asprintf(&lex_errstr, " requires attribute 'var'."); longjmp(lex_goterr, __LINE__); } lex_new(EF_BEGIN, 0); char *var_name_dup; my_asprintf(&var_name_dup, "[*].%s", attr_var); if ( !attr_code ) { lex_new(VAR, 0); spl_lex_prog_string(var_name_dup); lex_new('=', 0); spl_lex_string("", flags, indenting_delim); lex_new(';', 0); } else { lex_new(VAR, 0); lex_new(ID, strdup("_data_")); lex_new('=', 0); spl_lex_string("", flags, indenting_delim); lex_new(';', 0); lex_new(VAR, 0); spl_lex_prog_string(var_name_dup); lex_new('=', 0); spl_lex_prog_string(strdup(attr_code)); lex_new(';', 0); } lex_new(EF_END, 0); return; } if ( !strcmp(tag, "if") ) { if ( !attr_code ) { my_asprintf(&lex_errstr, " requires attribute 'code'."); longjmp(lex_goterr, __LINE__); } lex_new('(', 0); spl_lex_prog_string(strdup(attr_code)); lex_new('?', 0); spl_lex_string("", flags, indenting_delim); lex_new(':', 0); if (spl_lex_findelsetag()) spl_lex_string("", flags, indenting_delim); else lex_new(VALUE, strdup("")); lex_new(')', 0); return; } if ( !strcmp(tag, "foreach") ) { if ( !attr_var || !attr_list ) { my_asprintf(&lex_errstr, " requires attributes 'var' and 'list'."); longjmp(lex_goterr, __LINE__); } lex_new(EF_BEGIN, 0); lex_new(VAR, 0); lex_new(ID, strdup("#iret")); lex_new(';', 0); lex_new(FOREACH, 0); spl_lex_prog_string(strdup(attr_var)); lex_new('(', 0); spl_lex_prog_string(strdup(attr_list)); lex_new(')', 0); lex_new(ID, strdup("#iret")); lex_new('=', 0); lex_new(ID, strdup("#iret")); lex_new(CAT, 0); spl_lex_string("", flags, indenting_delim); lex_new(';', 0); lex_new(RETURN, 0); lex_new(ID, strdup("#iret")); lex_new(';', 0); lex_new(EF_END, 0); return; } if ( !strcmp(tag, "indent") ) { lex_new('(', 0); spl_lex_string("", flags, attr_char ? *attr_char : 0); lex_new(')', 0); return; } if ( !strcmp(tag, "else") ) { my_asprintf(&lex_errstr, "Found tag without prior or tag."); longjmp(lex_goterr, __LINE__); } my_asprintf(&lex_errstr, "Unknown tag: %s", tag); longjmp(lex_goterr, __LINE__); } static char *strndup_with_indenting_delim(const char *str, int len, int indenting_delim, int *id_status) { if (!indenting_delim) return my_strndup(str, len); char *text = malloc(len + 1); int i=0, j=0; while (str[i] && i < len) { if (!*id_status) { if (!strchr(" \t\n\r", str[i])) { if (str[i] == indenting_delim) { if (str[++i] == ' ') i++; } *id_status = 1; continue; } } else { if (str[i] == '\n') *id_status = 0; text[j++] = str[i]; } i++; } text[j++] = 0; return realloc(text, j); } static void spl_lex_string(char *term, int flags, int indenting_delim) { int len, tlen = term ? strlen(term) : 0; int old_php_like_tags_active; int old_php_like_tags_indenting_delim; int old_lex_translate_count; struct lex_item *old_lex_translate_ptr; int called_in_php_like_tags_context = 0; int term_is_newline = 0; int id_status = 0; old_lex_translate_count = lex_translate_count; old_lex_translate_ptr = lex_translate_ptr; if (lex_item_list_last && lex_item_list_last->lex == ID) { char *translate_id = lex_item_list_last->text; int translate_id_len = strlen(translate_id); if (!strcmp(translate_id, "_")) { translate_id = 0; } else { if (translate_id_len < 3 || translate_id[0] != '_' || translate_id[translate_id_len-1] != '_') goto not_a_translate_call; translate_id = my_strndup(translate_id+1, translate_id_len-2); } lex_translate_ptr = lex_item_list_last; lex_item_list_last->lex = TRANSLATE_PREFIX; free(lex_item_list_last->text); lex_item_list_last->text = 0; flags &= ~LEX_STRING_SPLTAGS; lex_new(TRANSLATE_SEPERATOR, 0); if (translate_id) lex_new(VALUE, translate_id); else lex_new(UNDEF, 0); not_a_translate_call:; } if ( term && !strcmp(term, "\n") ) { term_is_newline = 1; tlen = 0; } old_php_like_tags_active = php_like_tags_active; old_php_like_tags_indenting_delim = php_like_tags_indenting_delim; php_like_tags_active = 0; if ( term && !strcmp(term, "") ) { php_like_tags_active = 1; called_in_php_like_tags_context = 1; indenting_delim = php_like_tags_indenting_delim; term = php_like_tags_term; tlen = term ? strlen(term) : 0; lex_new(ID, strdup("#tpldata")); lex_new(SAPPEND, 0); } if (!called_in_php_like_tags_context && (flags & LEX_STRING_SPLTAGS)) { lbstack_push(); lex_new(RVALUE_CONTEXT_BEGIN, 0); } for (len = 0; input[len]; len++) { if ( flags & (LEX_STRING_BACKSLASHES|LEX_STRING_REGEX) ) { if ( input[len] == '\\' && input[len+1] ) { len++; continue; } } if ( term ) { if ( term_is_newline ) { if (input[len] == '\r' || input[len] == '\n') { if (input[len] == '\r') len++; if (input[len] == '\n') len++; break; } } else if ( !strncmp(input+len, term, tlen) ) break; } if ( flags & LEX_STRING_SPLTAGS ) { if ( !strncmp(input+len, ""); input += 2; lex_new(ID, strdup("#tpldata")); lex_new(SAPPEND, 0); len = -1; continue; } } if ( input[len] == '$' && (!(flags & LEX_STRING_REGEX) || isalpha((unsigned char)input[len+1]) || strchr("_({$:[", input[len+1])) ) { spl_lex_new_string(strndup_with_indenting_delim(input, len, indenting_delim, &id_status), flags); input += len + 1; if (lex_translate_ptr) { char *translate_token; my_asprintf(&translate_token, "{%d}", lex_translate_count++); lex_new_translate(CAT, 0); lex_new_translate(VALUE, translate_token); lex_new(TRANSLATE_SEPERATOR, 0); } else lex_new(CAT, 0); switch ( *input ) { case '(': lex_new(EF_BEGIN, 0); input++; spl_lex_prog(")"); lex_new(EF_END, 0); input++; break; case '{': lex_new('(', 0); input++; spl_lex_prog("}"); lex_new(')', 0); input++; break; case '@': case '#': case ']': { char prog[3] = { '$', *input, 0 }; spl_lex_prog_string(strdup(prog)); input++; break; } case '$': case ':': case '?': len = 0; goto skip_2nd_cat_op; case ' ': case '\t': case '\r': case '\n': len = -1; while (*input == ' ' || *input == '\t') input++; if (*input == '\r') input++; if (*input == '\n') input++; goto skip_2nd_cat_op; case '[': len = -1; while (*input && *input != ']') input++; if (*input == ']') input++; goto skip_2nd_cat_op; case '<': { char *regex_result_id; int inner_len = strcspn(++input, ">"); my_asprintf(®ex_result_id, "#reres.%.*s", inner_len, input); lex_new(ID, regex_result_id); input += inner_len; if (*input == '>') input++; break; } case '-': case '+': case '0' ... '9': { char *regex_result_id; int inner_len = 1; /* strspn(input, "0123456789"); */ if (*input == '-' || *input == '+') my_asprintf(®ex_result_id, "#reres.%s", *input == '-' ? "=NC" : "=LC"); else my_asprintf(®ex_result_id, "#reres.%.*s", inner_len, input); lex_new(ID, regex_result_id); input += inner_len; break; } case 'a' ... 'z': case 'A' ... 'Z': case '_': { int inner_len = 0; do inner_len++; while ( (input[inner_len] >= 'a' && input[inner_len] <= 'z') || (input[inner_len] >= 'A' && input[inner_len] <= 'Z') || (input[inner_len] >= '0' && input[inner_len] <= '9') || input[inner_len] == '_' || input[inner_len] == '.'); while (input[inner_len-1] == '.') inner_len--; lex_new('(', 0); spl_lex_prog_string(my_strndup(input, inner_len)); lex_new(')', 0); input += inner_len; break; } default: my_asprintf(&lex_errstr, "Unrecognised $ substitution in string."); longjmp(lex_goterr, __LINE__); break; } lex_new_translate(CAT, 0); len = -1; skip_2nd_cat_op:; } } spl_lex_new_string(strndup_with_indenting_delim(input, len, indenting_delim, &id_status), flags); input += len + tlen; if (php_like_tags_active) { lex_new(';', 0); lex_new(INSERT_PROG_END, 0); lex_new(VALUE, strdup("push \"#tpldata\"")); lex_new(';', 0); } if (!called_in_php_like_tags_context && (flags & LEX_STRING_SPLTAGS)) { lex_new(RVALUE_CONTEXT_END, 0); lbstack_pop(); } if (lex_translate_ptr && !old_lex_translate_ptr) { lex_new(TRANSLATE_END, 0); lex_translate_count = old_lex_translate_count; lex_translate_ptr = old_lex_translate_ptr; } php_like_tags_active = old_php_like_tags_active; php_like_tags_indenting_delim = old_php_like_tags_indenting_delim; } static void spl_lex_prog_string(char *text) { const char *oldinput = input; struct lex_srcfile *oldsrc = lex_srcfile_current; lex_srcfile_current = malloc(sizeof(struct lex_srcfile)); lex_srcfile_current->text = input = text; my_asprintf((char**)&lex_srcfile_current->name, "%s:byte(%d)", oldsrc->name, (int)(oldinput - oldsrc->text)); lex_srcfile_current->next = lex_srcfile_list; lex_srcfile_list = lex_srcfile_current; spl_lex_prog(0); lex_srcfile_current = oldsrc; input = oldinput; } static void spl_lex_prog(char *term) { int tlen = term ? strlen(term) : 0; int last_was_id = 0; struct lex_item *old_lex_translate_ptr = lex_translate_ptr; int old_lex_translate_count = lex_translate_count; lex_translate_ptr = 0; lex_translate_count = 0; next_symbol: if ( last_was_id > 0 ) last_was_id--; input += strspn(input, " \t\r\n"); switch (*input) { case 0: if (!term) goto lex_prog_return; my_asprintf(&lex_errstr, "Unexpected end-of-file (expected '%s').", term); longjmp(lex_goterr, __LINE__); case '"': case '\'': { char *new_term = my_strndupa(input++, 1); spl_lex_string(new_term, LEX_STRING_BACKSLASHES, 0); goto next_symbol; } case '#': if ((input[1] < 'a' || input[1] > 'z') && input[1] != '!') goto just_a_normal_operator; input++; spl_lex_pragma(); goto next_symbol; case '.': if (input[1] < '0' || input[1] > '9') goto just_a_normal_operator; case '0' ... '9': if (input[0] == '0' && input[1] == 'x') { char *buffer; my_asprintf(&buffer, "%ld", strtol(input+2, (char**)&input, 16)); lex_new(VALUE, buffer); } else if (input[0] == '0' && input[1] == 'o') { char *buffer; my_asprintf(&buffer, "%ld", strtol(input+2, (char**)&input, 8)); lex_new(VALUE, buffer); } else if (input[0] == '0' && input[1] == 'b') { char *buffer; my_asprintf(&buffer, "%ld", strtol(input+2, (char**)&input, 2)); lex_new(VALUE, buffer); } else { int len = strspn(input, "0123456789."); while (input[len-1] == '.') len--; lex_new(VALUE, my_strndup(input, len)); input += len; } goto next_symbol; case '$': { char *regex_result_id; int len = 0; switch (input[1]) { case '<': input += 2; len = strcspn(input, ">"); my_asprintf(®ex_result_id, "#reres.%.*s", len++, input); break; case '$': len = 2; regex_result_id = strdup("#reres"); break; case '-': len = 2; regex_result_id = strdup("#reres.=NC"); break; case '+': len = 2; regex_result_id = strdup("#reres.=LC"); break; case '0' ... '9': input += 1; len = 1; /* strspn(input, "0123456789"); */ my_asprintf(®ex_result_id, "#reres.%.*s", len, input); break; case '@': len = 2; regex_result_id = strdup("#array"); break; case '#': len = 2; regex_result_id = strdup("#index"); break; case '[': case ']': len = 2; lex_new('(', 0); lex_new(LNOT, 0); lex_new(DEFINED, 0); lex_new('(', 0); if (input[1] == '[') lex_new(PREV, 0); else lex_new(NEXT, 0); lex_new(ID, strdup("#array")); lex_new(',', 0); lex_new(ID, strdup("#index")); lex_new(')', 0); lex_new(')', 0); goto ignore_regex_result_id; default: goto just_a_normal_operator; } lex_new(ID, regex_result_id); ignore_regex_result_id: input += len; last_was_id = 2; goto next_symbol; } case 'a' ... 'z': case 'A' ... 'Z': case '_': { int len = 0; while (1) { if (input[len] >= 'a' && input[len] <= 'z') len++; else if (input[len] >= 'A' && input[len] <= 'Z') len++; else if (input[len] >= '0' && input[len] <= '9') len++; else if (input[len] == '_') len++; else break; } char *text = my_strndup(input, len); input += len; if (!lex_item_list_last || lex_item_list_last->lex != '.' ) for (int i=0; lex_keywords[i].text; i++) if ( !strcmp(lex_keywords[i].text, text) ) { lex_new(lex_keywords[i].lex, 0); free(text); goto next_symbol; } if (!in_pragma_arg) for (struct define *def = define_list; def; def=def->next) { if ((!def->active || strcmp(current_define, def->name)) && !strcmp(def->name, text)) { char *expanded_text = strdup(def->text); free(text); if (define_recursion > 1000) { my_asprintf(&lex_errstr, "Endless recursion in macro expansion."); longjmp(lex_goterr, __LINE__); } struct define *args_start = 0; struct define *args_stop = 0; if (def->args) { char *this_args = strdup(def->args); char *this_arg, *this_args_p = this_args; input += strcspn(input, "("); if (*input) input++; this_arg = my_strsep(&this_args_p, ","); while (this_arg) { struct define *adef = calloc(1, sizeof(struct define)); adef->isarg = 1; adef->name = strdup(this_arg); in_pragma_arg=1; const char *begin = input; spl_lex_prog(this_args_p ? "," : ")"); adef->text=my_strndup(begin, input-begin); if (*input) input++; in_pragma_arg=0; adef->next = define_list; define_list = args_start = adef; if (!args_stop) args_stop = adef; this_arg = my_strsep(&this_args_p, ","); } free(this_args); int new_expanded_len = 1; for (int i=0; expanded_text[i]; i++) { if (expanded_text[i] == '<') { struct define *idef = args_start; while (idef) { if (!strncmp(expanded_text+i+1, idef->name, strlen(idef->name)) && expanded_text[i+1+strlen(idef->name)] == '>') { new_expanded_len += strlen(idef->text); i += strlen(idef->name) + 1; break; } if (idef == args_stop) break; idef = idef->next; } } new_expanded_len++; } char *new_expanded_text = malloc(new_expanded_len); int new_expanded_text_pos = 0; for (int i=0; expanded_text[i]; i++) { if (expanded_text[i] == '<') { struct define *idef = args_start; while (idef) { if (!strncmp(expanded_text+i+1, idef->name, strlen(idef->name)) && expanded_text[i+1+strlen(idef->name)] == '>') { strcpy(new_expanded_text+new_expanded_text_pos, idef->text); new_expanded_text_pos += strlen(idef->text); i += strlen(idef->name) + 1; goto do_not_copy_char; } if (idef == args_stop) break; idef = idef->next; } } new_expanded_text[new_expanded_text_pos++] = expanded_text[i]; do_not_copy_char:; } free(expanded_text); new_expanded_text[new_expanded_text_pos] = 0; expanded_text = new_expanded_text; } def->active = 1; define_recursion++; char *old_current_define = current_define; current_define = def->name; spl_lex_prog_string(expanded_text); current_define = old_current_define; define_recursion--; def->active = 0; struct define *idef = define_list; struct define **lastp = &define_list; while (idef) { if (idef == args_start) { *lastp = idef->next; free(idef->name); free(idef->text); if (idef->args) free(idef->args); free(idef); if (idef == args_stop) break; idef = args_start = *lastp; } else { lastp = &idef->next; idef = idef->next; } } goto next_symbol; } } lex_new(ID, text); last_was_id = 2; goto next_symbol; } case ']': lex_new(']', 0); input++; last_was_id = 2; goto next_symbol; case '}': case ')': if ( term && !strncmp(input, term, tlen) ) goto lex_prog_return; my_asprintf(&lex_errstr, "Unexpected '%c'.", *input); longjmp(lex_goterr, __LINE__); case '{': if (input[1] == '[') { lex_new(BLK_BEGIN_NOCTX, 0); input += 2; } else { lex_new('{', 0); input++; } spl_lex_prog("}"); lex_new('}', 0); input++; goto next_symbol; case '(': for (int i=0; lex_ops[i].text; i++) if ( !strncmp(input, lex_ops[i].text, strlen(lex_ops[i].text)) ) goto just_a_normal_operator; if ( input[1] == '{' ) { lex_new(EF_BEGIN, 0); input+=2; spl_lex_prog("})"); lex_new(EF_END, 0); input+=2; } else { if ( lex_item_list_last && lex_item_list_last->lex == ID ) { if ( lex_item_list_last->prev && lex_item_list_last->prev->lex != '.' && lex_item_list_last->prev->lex != ']' && lex_item_list_last->prev->lex != '&' && lex_item_list_last->prev->lex != '$' && lex_item_list_last->prev->lex != MUL && lex_item_list_last->prev->lex != FUNCTION && lex_item_list_last->prev->lex != METHOD && lex_item_list_last->prev->lex != FOREACH && lex_item_list_last->prev->lex != NEW ) lex_item_list_last->lex = FUNC_ID; if ( lex_item_list_last->prev && lex_item_list_last->prev->prev && lex_item_list_last->prev->lex == MUL && (lex_item_list_last->prev->prev->lex == ID || lex_item_list_last->prev->prev->lex == VALUE || lex_item_list_last->prev->prev->lex == ')' || lex_item_list_last->prev->prev->lex == ']' )) lex_item_list_last->lex = FUNC_ID; if ( !lex_item_list_last->prev ) lex_item_list_last->lex = FUNC_ID; } lex_new('(', 0); input++; spl_lex_prog(")"); lex_new(')', 0); input++; } goto next_symbol; case '=': case '!': { if (input[1] != '~') goto just_a_normal_operator; int not_mode = *input == '!'; int eval_mode = 0; int len = 0; input += 2; input += strspn(input, " \t\r\n"); static char seplist[] = "/:,!%@"; char sepstr[] = "X"; struct lex_item *subst_lex = 0; if ( input[0] == 's' && strchr(seplist, input[1]) ) { sepstr[0] = input[1]; subst_lex = lex_new(not_mode ? NREGEX_SUBST : REGEX_SUBST, 0); input += 2; spl_lex_string(sepstr, LEX_STRING_REGEX, 0); lex_new(REGEX_SEP, 0); spl_lex_string(sepstr, LEX_STRING_REGEX, 0); lex_new(REGEX_SEP, 0); } else if ( input[0] == 'e' && strchr(seplist, input[1]) ) { eval_mode = 1; sepstr[0] = input[1]; subst_lex = lex_new(not_mode ? NREGEX_EVAL : REGEX_EVAL, 0); input += 2; spl_lex_string(sepstr, LEX_STRING_REGEX, 0); lex_new(REGEX_SEP, 0); } else if ( strchr(seplist, input[0]) ) { sepstr[0] = input[0]; lex_new(not_mode ? NREGEX : REGEX, 0); input++; spl_lex_string(sepstr, LEX_STRING_REGEX, 0); lex_new(REGEX_SEP, 0); } else { my_asprintf(&lex_errstr, "Error in regex syntax."); longjmp(lex_goterr, __LINE__); } while ( isalpha((unsigned char)input[len]) ) len++; char *mod = my_strndup(input, len); if (subst_lex && strchr(mod, 'R')) { if (subst_lex->lex == REGEX_SUBST) subst_lex->lex = REGEX_SUBST_R; if (subst_lex->lex == NREGEX_SUBST) subst_lex->lex = NREGEX_SUBST_R; if (subst_lex->lex == REGEX_EVAL) subst_lex->lex = REGEX_EVAL_R; if (subst_lex->lex == NREGEX_EVAL) subst_lex->lex = NREGEX_EVAL_R; } if (eval_mode) { char *new_mod; if (strchr(mod, 'N') || strchr(mod, 'P') || strchr(mod, 'A') || strchr(mod, 'I') || strchr(mod, 'E') || strchr(mod, 'L')) { my_asprintf(&lex_errstr, "Error in regex syntax."); longjmp(lex_goterr, __LINE__); } my_asprintf(&new_mod, "%sEANP", mod); free(mod); mod = new_mod; } lex_new(VALUE, mod); input += len; lex_new(REGEX_SEP, 0); goto next_symbol; } case '/': if (input[1] == '/') { while (*input && *input != '\r' && *input != '\n') input++; goto next_symbol; } if (input[1] == '*') { while (input[0] && input[1] && (input[0] != '*' || input[1] != '/')) input++; if (*input) input++; if (*input) input++; goto next_symbol; } goto just_a_normal_operator; case '<': { int i = 1, with_indent = 0; if (input[i] == ':') { with_indent = 1; i++; } while (isalpha((unsigned char)input[i])) i++; if (input[i] == '>') { char endtag[i+32]; snprintf(endtag, i+32, "", i-(1+with_indent), input+1+with_indent); input += i+1; if (with_indent) input += strspn(input, " \t\r\n"); spl_lex_string(endtag, LEX_STRING_SPLTAGS, with_indent ? ':' : 0); goto next_symbol; } goto just_a_normal_operator; } case '?': if (php_like_tags_active && input[1] == '>') { if (!strcmp(term, "?>")) goto lex_prog_return; input += 2; spl_lex_string("", LEX_STRING_SPLTAGS, 0); goto next_symbol; } goto just_a_normal_operator; just_a_normal_operator: default: { if ( term && !strncmp(input, term, tlen) ) goto lex_prog_return; for (int i=0; lex_ops[i].text; i++) if ( !strncmp(input, lex_ops[i].text, strlen(lex_ops[i].text)) ) { input += strlen(lex_ops[i].text); switch ( lex_ops[i].lex ) { case STRING_EOL: { if ( *input == ' ' || *input == '\t' ) input++; int len = strcspn(input, "\r\n"); if ( input[len] == '\r' ) len++; if ( input[len] == '\n' ) len++; lex_new(VALUE, my_strndup(input, len)); input += len; goto next_symbol; } case STRING_EOL_S: { if ( *input == ' ' || *input == '\t' ) input++; spl_lex_string("\n", LEX_STRING_BACKSLASHES, 0); goto next_symbol; } case STRING_LABEL: { input += strspn(input, " \t\r\n"); int len = strspn(input, "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); char *label = my_strndupa(input, len); char indenting_delim = 0; input += len; if ( *input == ' ' || *input == '\t' || *input == '\r' || *input == '\n' ) input++; else { indenting_delim = *(input++); if ( *input == ' ' || *input == '\t' || *input == '\r' || *input == '\n' ) input++; } char *end = strstr(input, label); if (!end) { my_asprintf(&lex_errstr, "End label missing."); longjmp(lex_goterr, __LINE__); } int id_status = 0; lex_new(VALUE, strndup_with_indenting_delim(input, end-input, indenting_delim, &id_status)); input = end + len; goto next_symbol; } case STRING_LABEL_S: { input += strspn(input, " \t\r\n"); int len = strspn(input, "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); char *label = my_strndupa(input, len); char indenting_delim = 0; input += len; if ( *input == ' ' || *input == '\t' || *input == '\r' || *input == '\n' ) input++; else { indenting_delim = *(input++); if ( *input == ' ' || *input == '\t' || *input == '\r' || *input == '\n' ) input++; } spl_lex_string(label, LEX_STRING_BACKSLASHES, indenting_delim); goto next_symbol; } default: if (lex_ops[i].lex == SPECIALREF && lex_item_list_last && (lex_item_list_last->lex == ID || lex_item_list_last->lex == ']' || lex_item_list_last->lex == SPECIALREF)) lex_new('.', 0); if (lex_ops[i].lex == SPECIALREF) { char *lookupcode = ""; if (input[-2] == '*') lookupcode = ""; if (input[-2] == '+') lookupcode = "!CLS"; if (input[-2] == '/') lookupcode = "!ROOT"; if (input[-2] == '.') lookupcode = "!THIS"; lex_new(lex_ops[i].lex, strdup(lookupcode)); } else lex_new(lex_ops[i].lex, 0); goto next_symbol; } } if (*input == '[' && lex_item_list_last && (lex_item_list_last->lex == ID || lex_item_list_last->lex == ')' || lex_item_list_last->lex == ']' || lex_item_list_last->lex == SPECIALREF)) lex_new('.', 0); lex_new(*(input++), 0); goto next_symbol; } } longjmp(lex_goterr, __LINE__); lex_prog_return: lex_translate_ptr = old_lex_translate_ptr; lex_translate_count = old_lex_translate_count; } static char *last_debug_info; static void create_debug_op(int force) { char *newinfo; if (!gen_debug_info) return; int lineno = 1, charno = 1; const char *c = lex_last_item->src->text; while (c < lex_last_item->pos) if (*(c++) == '\n' ) lineno++, charno=1; else charno++; my_asprintf(&newinfo, "%d:%d:%s", lineno, charno, lex_last_item->src->name); if (!force && last_debug_info && !strcmp(last_debug_info, newinfo)) { free(newinfo); return; } if (last_debug_info) free(last_debug_info); last_debug_info = newinfo; spl_asm_add(as, SPL_OP_DBGSYM, newinfo); } static char *get_encoding_pragma(const char *text) { char *token = "#encoding "; int token_size = strlen(token); const char *begin = strstr(text, token); if ( !begin ) return 0; begin += token_size; token_size = strcspn(begin, " \r\n\t"); if ( token_size <= 0 ) return 0; return my_strndup(begin, token_size); } extern int spl_compiler(struct spl_asm *a, const char *prog, const char *name, spl_malloc_file_function *mff, int gd) { #ifdef ENABLE_PTHREAD_SUPPORT pthread_mutex_lock(&compiler_lck); #endif as = a; malloc_file_func = mff; gen_debug_info = gd; no_checkp_insn = 0; php_like_tags_active = 0; php_like_tags_term = 0; define_list = 0; current_define = ""; define_recursion = 0; in_pragma_arg = 0; rvtolv_counter = 0; import_asm_label_counter = 0; if ( as->labels ) { spl_report(SPL_REPORT_COMPILER, as, "Compiler called with an assembler with non-empty label list!\n"); #ifdef ENABLE_PTHREAD_SUPPORT pthread_mutex_unlock(&compiler_lck); #endif return -1; } encoding = get_encoding_pragma(prog); if (encoding) { prog = spl_utf8_import(prog, encoding); if (!prog) { free(encoding); spl_report(SPL_REPORT_COMPILER, as, "Character set (#encoding) '%s' is unknown!\n", encoding); #ifdef ENABLE_PTHREAD_SUPPORT pthread_mutex_unlock(&compiler_lck); #endif return -1; } } if ( spl_utf8_check(prog) ) { spl_report(SPL_REPORT_COMPILER, as, "Compiler called with non-utf8 encoded program code!\n" "A decoder bug or just a missing '#encoding' pragma?\n"); if (encoding) { free(encoding); free((char*)prog); } #ifdef ENABLE_PTHREAD_SUPPORT pthread_mutex_unlock(&compiler_lck); #endif return -1; } input = prog; lex_srcfile_current = malloc(sizeof(struct lex_srcfile)); lex_srcfile_current->next = 0; lex_srcfile_current->text = input; lex_srcfile_current->name = name; lex_srcfile_list = lex_srcfile_current; lex_item_list_first = 0; lex_item_list_last = 0; lex_last_item = 0; lex_errstr = strdup("Generic lexer error"); label_stack_index = 0; label_stack_counter = 0; label_stack[label_stack_index] = label_stack_counter; breakcont_stack_index = 0; breakcont_stack_counter = 0; breakcont_stack[breakcont_stack_index] = breakcont_stack_counter; packpatch_stack_index = 0; last_debug_info = 0; // the 'volatile' is to make the compiler happy. // (a volatile variable can't be clobbered by a 'longjmp') volatile int ret = 1; int errlnr = 0; // this is to make the compiler happy. // (the new prog_free_ptr can't be clobbered by a 'longjmp') void * volatile prog_free_ptr = (void *)prog; if ( !(errlnr = setjmp(lex_goterr)) ) { spl_lex_prog(0); ret = spl_yyparse (); } else { int lineno = 1, charno = 1, position = 0; const char *c = lex_srcfile_current->text; while (c < input) { if (*(c++) == '\n' ) lineno++, charno=1; else charno++; if (position < 30) position += 1; } char *code; my_asprintf(&code, "%.40s", (input-position)); for (int i=0; code[i]; i++) if ( *input == '\r' || code[i] == '\n' || code[i] == '\t' ) code[i] = ' '; spl_report(SPL_REPORT_LEXER, as, "[%d] near line %d, char %d in %s: %s\n>> %s\n %*s^\n", errlnr, lineno, charno, lex_srcfile_current->name, lex_errstr, code, position, ""); free(code); } if ( !ret && packpatch_stack_index ) { spl_yyerror("Parser returned with non-empty packpatch stack!"); ret = 1; } if ( !ret && label_stack_index ) { spl_yyerror("Parser returned with non-empty label stack!"); ret = 1; } while ( define_list ) { struct define *next = define_list->next; free(define_list->name); free(define_list->text); if (define_list->args) free(define_list->args); free(define_list); define_list = next; } while ( lex_srcfile_list ) { struct lex_srcfile *next = lex_srcfile_list->next; if (next) { // we allocated those, so we can cast them // to non-const and free them. free((char*)lex_srcfile_list->text); free((char*)lex_srcfile_list->name); } free(lex_srcfile_list); lex_srcfile_list = next; } if (last_debug_info) free(last_debug_info); if (lex_last_item) free(lex_last_item); free(lex_errstr); spl_asm_resolve_labels(as); if (encoding) { free(encoding); free(prog_free_ptr); } #ifdef ENABLE_PTHREAD_SUPPORT pthread_mutex_unlock(&compiler_lck); #endif return ret; } static void spl_yyerror (char const *err) { int lineno = 1, charno = 1, position = 0; const char *c = lex_last_item->src->text; while (c < lex_last_item->pos) { if (*(c++) == '\n' ) lineno++, charno=1; else charno++; if (position < 30) position += 1; } char *code; my_asprintf(&code, "%.40s", (lex_last_item->pos-position)); for (int i=0; code[i]; i++) if ( *input == '\r' || code[i] == '\n' || code[i] == '\t' ) code[i] = ' '; spl_report(SPL_REPORT_COMPILER, as, "near line %d, char %d in %s: %s\n>> %s\n %*s^\n", lineno, charno, lex_last_item->src->name, err, code, position, ""); free(code); }