/* * SPL - The SPL Programming Language * Copyright (C) 2004, 2005 Clifford Wolf * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * mod_format_xml.c: Simple module for loading and dumping XML */ /** * A simple XML parser/dumper module * * This module implements simple XML parser and dumper functions. */ #include #include #include "spl.h" #include "compat.h" // backward compatibility for old expat versions #ifndef XMLCALL # define XMLCALL #endif extern void SPL_ABI(spl_mod_format_xml_init)(struct spl_vm *vm, struct spl_module *mod, int restore); extern void SPL_ABI(spl_mod_format_xml_done)(struct spl_vm *vm, struct spl_module *mod); /* copied from mod_encode_xml.c */ static char *xml_encode(const char *source) { int source_i, target_i; for (source_i = target_i = 0; source[source_i]; source_i++) switch (source[source_i]) { case '&': /* & */ target_i += 5; break; case '<': /* < */ target_i += 4; break; case '>': /* > */ target_i += 4; break; case '"': /* " */ target_i += 6; break; case '\'': /* ' */ target_i += 6; break; default: target_i++; } char *target = malloc(target_i+1); for (source_i = target_i = 0; source[source_i]; source_i++) switch (source[source_i]) { case '&': /* & */ target[target_i++] = '&'; target[target_i++] = 'a'; target[target_i++] = 'm'; target[target_i++] = 'p'; target[target_i++] = ';'; break; case '<': /* < */ target[target_i++] = '&'; target[target_i++] = 'l'; target[target_i++] = 't'; target[target_i++] = ';'; break; case '>': /* > */ target[target_i++] = '&'; target[target_i++] = 'g'; target[target_i++] = 't'; target[target_i++] = ';'; break; case '"': /* " */ target[target_i++] = '&'; target[target_i++] = 'q'; target[target_i++] = 'u'; target[target_i++] = 'o'; target[target_i++] = 't'; target[target_i++] = ';'; break; case '\'': /* ' */ target[target_i++] = '&'; target[target_i++] = 'a'; target[target_i++] = 'p'; target[target_i++] = 'o'; target[target_i++] = 's'; target[target_i++] = ';'; break; default: target[target_i++] = source[source_i]; } target[target_i] = 0; return target; } /** * This function returns a tree of ordered hashes. The keys in the ordered * hashes are encoded as following: * * A: * An attribute to this node. * * C * Character data. is counting up from zero. * * E: * A child node (element) in the XML tree. is counting up * from zero. So e.g. "E0:realname" is the 1st child element * of the type "realname". * * This is an order hash containing attributes, child nodes * and character data again. * * Because the hash is ordered, it is possible to get the elements in the * correct order by using 'foreach' loops or using the 'next' and 'prev' * instructions, or directly address the elements. E.g. * * var xmldata = * <> * * * * * This is a simple test HTML page. * * * *

Nothing interesting here!

* * * ; * * var xmltree = format_xml_parse(xmldata); * * debug xmltree.["E0:html"].["E0:head"].["E0:title"].["C0"]; */ // builtin format_xml_parse(xmldata) static struct spl_node *handler_format_xml_parse(struct spl_task *task, void UNUSED(*data)) { struct stack_el_t { char *name; int counter; struct stack_el_t *next; }; struct stack_t { struct spl_node *node; struct stack_el_t *el_list; struct stack_t *next; }; char *xmlfile = spl_clib_get_string(task); struct stack_t *stack = 0; int last_is_char = -1; void stack_push() { struct stack_t *s = malloc(sizeof(struct stack_t)); s->node = spl_get(0); s->el_list = 0; s->next = stack; stack = s; } void stack_pop() { struct stack_t *s = stack; struct stack_el_t *e = s->el_list; while (e) { struct stack_el_t *next = e->next; free(e->name); free(e); e = next; } stack = s->next; free(s); } int stack_count(const char *el, int offset) { struct stack_el_t *e = stack->el_list; while (e) { if ( !strcmp(e->name, el) ) return (e->counter += offset); e = e->next; } e = malloc(sizeof(struct stack_el_t)); e->next = stack->el_list; stack->el_list = e; e->name = strdup(el); e->counter = -1; return (e->counter += offset); } void chardata_cleanup() { if ( last_is_char < 0 ) return; char *id, *id_enc, *t0, *t1; my_asprintf(&id, "C%d", last_is_char); id_enc = spl_hash_encode(id); struct spl_node *n = spl_lookup(task, stack->node, id_enc, 0); t0 = spl_get_string(n); t1 = t0 + strlen(t0); while (t1-- > t0) { if (*t1 == '\r') continue; if (*t1 == '\n') continue; if (*t1 == '\t') continue; if (*t1 == ' ') continue; break; } t1[1] = 0; if (!*t0) { spl_delete(task, stack->node, id_enc); stack_count(" chardata", -1); free(id_enc); free(id); return; } for (t1 = t0; *t1; t1++) { if (*t1 == '\r') continue; if (*t1 == '\n') continue; if (*t1 == '\t') continue; if (*t1 == ' ') continue; break; } if (t0 != t1) { t1 = strdup(t1); spl_set_string(n, t1); } free(id_enc); free(id); } void XMLCALL element_start_hdl(void UNUSED(*inner_data), const char *el, const char **attr) { char *id, *id_enc; if (last_is_char >= 0) chardata_cleanup(); my_asprintf(&id, "E%d:%s", stack_count(el, +1), el); id_enc = spl_hash_encode(id); stack_push(); spl_create(task, stack->next->node, id_enc, stack->node, SPL_CREATE_LOCAL); free(id_enc); free(id); for (int i = 0; attr[i]; i += 2) { my_asprintf(&id, "A:%s", attr[i]); id_enc = spl_hash_encode(id); spl_create(task, stack->node, id_enc, SPL_NEW_STRING_DUP(attr[i+1]), SPL_CREATE_LOCAL); free(id_enc); free(id); } last_is_char = -1; } void XMLCALL element_end_hdl(void UNUSED(*inner_data), const char UNUSED(*el)) { if (last_is_char >= 0) chardata_cleanup(); stack_pop(); last_is_char = -1; } void XMLCALL chardata_hdl(void UNUSED(*userData), const XML_Char *s, int len) { if ( last_is_char < 0 ) { char *id, *id_enc; last_is_char = stack_count(" chardata", +1); my_asprintf(&id, "C%d", last_is_char); id_enc = spl_hash_encode(id); spl_create(task, stack->node, id_enc, SPL_NEW_STRING(my_strndup(s, len)), SPL_CREATE_LOCAL); free(id_enc); free(id); } else { char *id, *id_enc, *txt; my_asprintf(&id, "C%d", last_is_char); id_enc = spl_hash_encode(id); struct spl_node *n = spl_lookup(task, stack->node, id_enc, 0); my_asprintf(&txt, "%s%.*s", spl_get_string(n), len, s); spl_set_string(n, txt); free(id_enc); free(id); } } stack_push(); spl_set_string(stack->node, strdup("")); XML_Parser p = XML_ParserCreate(0); XML_SetElementHandler(p, element_start_hdl, element_end_hdl); XML_SetCharacterDataHandler(p, chardata_hdl); if ( XML_Parse(p, xmlfile, strlen(xmlfile), 1) == 0 ) { spl_clib_exception(task, "FormatXmlEx", "description", SPL_NEW_PRINTF("XML Parse error at line %d: %s", (int)XML_GetCurrentLineNumber(p), XML_ErrorString(XML_GetErrorCode(p))), NULL); XML_ParserFree(p); while (stack->next) stack_pop(); spl_put(task->vm, stack->node); stack_pop(); return 0; } XML_ParserFree(p); struct spl_node *ret = stack->node; stack_pop(); return ret; } /** * Create an XML text from a data structure such as returned by [[format_xml_parse()]]. */ // builtin format_xml_dump(xmltree) static struct spl_node *handler_format_xml_dump(struct spl_task *task, void UNUSED(*data)) { struct txtlist_t { char *text; struct txtlist_t *next; }; struct txtlist_t *list = 0, *current = 0; int textlen = 0, i, j; struct spl_node *tree = spl_clib_get_node(task); if (!tree) return 0; spl_cleanup(task, tree); void newtext() { struct txtlist_t *t = calloc(1, sizeof(struct txtlist_t)); if ( !current ) list = t; else current->next = t; current = t; } void dump_xml(struct spl_node *n, int recurs) { struct spl_node_sub *s; char *t0, *t1, *t2; if ( recurs > 1024 ) { spl_report(SPL_REPORT_RUNTIME, task, "XML Object tree seems to be cyclic!\n"); return; } if (recurs > 0) { for (s=n->subs_begin; s; s=s->next) { if (*s->key != 'A') continue; t0 = spl_hash_decode(s->key); t1 = strchr(t0, ':'); if ( t1 ) { newtext(); t2 = xml_encode(spl_get_string(s->node)); textlen += my_asprintf(¤t->text, " %s=\"%s\"", t1+1, t2); free(t2); } free(t0); } newtext(); textlen += my_asprintf(¤t->text, ">\n"); } for (s=n->subs_begin; s; s=s->next) switch (*s->key) { case 'A': break; case 'C': newtext(); t2 = xml_encode(spl_get_string(s->node)); textlen += my_asprintf(¤t->text, "%*s%s\n", recurs*3, "", t2); free(t2); break; case 'E': t0 = spl_hash_decode(s->key); t1 = strchr(t0, ':'); if ( t1 ) { newtext(); textlen += my_asprintf(¤t->text, "%*s<%s", recurs*3, "", t1+1); dump_xml(s->node, recurs+1); newtext(); textlen += my_asprintf(¤t->text, "%*s\n", recurs*3, "", t1+1); } free(t0); break; default: /* simply ignore the rest */ break; } } dump_xml(tree, 0); char *text = malloc(textlen+1); i=0; while (list) { current = list; list = list->next; for (j=0; current->text[j]; i++, j++) text[i] = current->text[j]; free(current->text); free(current); } assert(i == textlen); text[i] = 0; return SPL_NEW_STRING(text); } /** * An instance of this object is thrown on XML parser errors. */ // object FormatXmlEx /** * A description text describing the error. */ // var description; void SPL_ABI(spl_mod_format_xml_init)(struct spl_vm *vm, struct spl_module *mod, int restore) { if (!restore) spl_eval(vm, 0, strdup(mod->name), "object FormatXmlEx { }"); spl_clib_reg(vm, "format_xml_parse", handler_format_xml_parse, 0); spl_clib_reg(vm, "format_xml_dump", handler_format_xml_dump, 0); } void SPL_ABI(spl_mod_format_xml_done)(struct spl_vm UNUSED(*vm), struct spl_module UNUSED(*mod)) { return; }