XML Parser for Libsvgtiny

这是我的硕士论文笔记系列第三篇,原文写于 2016 年五月。

Libxml2

Libxml2資料結構

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
/**
* xmlNode:
*
* A node in an XML tree.
*/

typedef struct _xmlNode xmlNode;
typedef xmlNode *xmlNodePtr;
struct _xmlNode {
void *_private; /* application data */
xmlElementType type; /* type number, must be second ! */
const xmlChar *name; /* the name of the node, or the entity */
struct _xmlNode *children; /* parent->childs link */
struct _xmlNode *last; /* last child link */
struct _xmlNode *parent; /* child->parent link */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */

/* End of common part */
xmlNs *ns; /* pointer to the associated namespace */
xmlChar *content; /* the content */
struct _xmlAttr *properties;/* properties list */
xmlNs *nsDef; /* namespace definitions on this node */
void *psvi; /* for type/PSVI informations */
unsigned short line; /* line number */
unsigned short extra; /* extra data for XPath/XSLT */
};


/**
* xmlDoc:
*
* An XML document.
*/
typedef struct _xmlDoc xmlDoc;
typedef xmlDoc *xmlDocPtr;
struct _xmlDoc {
void *_private; /* application data */
xmlElementType type; /* XML_DOCUMENT_NODE, must be second ! */
char *name; /* name/filename/URI of the document */
struct _xmlNode *children; /* the document tree */
struct _xmlNode *last; /* last child link */
struct _xmlNode *parent; /* child->parent link */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* autoreference to itself */

/* End of common part */
int compression; /* level of zlib compression */
int standalone; /* standalone document (no external refs)
¦ 1 if standalone="yes"
¦ 0 if standalone="no"
¦ -1 if there is no XML declaration
¦ -2 if there is an XML declaration, but no
standalone attribute was specified */
struct _xmlDtd *intSubset; /* the document internal subset */
struct _xmlDtd *extSubset; /* the document external subset */
struct _xmlNs *oldNs; /* Global namespace, the old way */
const xmlChar *version; /* the XML version string */
const xmlChar *encoding; /* external initial encoding, if any */
void *ids; /* Hash table for ID attributes if any */
void *refs; /* Hash table for IDREFs attributes if any */
const xmlChar *URL; /* The URI for that document */
int charset; /* encoding of the in-memory contentactually an xmlCharEncoding */
struct _xmlDict *dict; /* dict used to allocate names or NULL */
void *psvi; /* for type/PSVI informations */
int parseFlags; /* set of xmlParserOption used to parse the document */
int properties; /* set of xmlDocProperties for this document set at the end of parsing */
};


/**
* xmlAttribute:
*
* An Attribute declaration in a DTD.
*/

typedef struct _xmlAttribute xmlAttribute;
typedef xmlAttribute *xmlAttributePtr;
struct _xmlAttribute {
void *_private; /* application data */
xmlElementType type; /* XML_ATTRIBUTE_DECL, must be second ! */
const xmlChar *name; /* Attribute name */
struct _xmlNode *children; /* NULL */
struct _xmlNode *last; /* NULL */
struct _xmlDtd *parent; /* -> DTD */
struct _xmlNode *next; /* next sibling link */
struct _xmlNode *prev; /* previous sibling link */
struct _xmlDoc *doc; /* the containing document */

struct _xmlAttribute *nexth; /* next in hash table */
xmlAttributeType atype; /* The attribute type */
xmlAttributeDefault def; /* the default */
const xmlChar *defaultValue; /* or the default value */
xmlEnumerationPtr tree; /* or the enumeration tree if any */
const xmlChar *prefix; /* the namespace prefix if any */
const xmlChar *elem; /* Element holding the attribute */
};

Libxml2構建API

1
dom_xml_parser *dom_xml_parser_create(void *dontCare1, void *dontCare2, MesgFuncPtr mesgFunc, void *dontCare3, dom_document **outDocument);

呼叫xmlInitParser()初始化parser,然後使用calloc定義了dom_documentdom_xml_parser的指針各一個。

1
dom_xml_error dom_xml_parser_parse_chunk(dom_xml_parser *parser, const uint8_t *data, size_t len);

呼叫xmlReadMemory()函式解析。

1
dom_exception dom_document_get_document_element(dom_document *document, dom_element **outNode);

用來尋找根<svg>,實作直接呼叫xmlDocGetRootElement()函式得到document→node,相當於默認是SVG檔案。

1
dom_exception dom_node_get_node_name(dom_node *node, dom_string **outString);

自幹一個dom_string_create_interned()函式:

  • 如果svg→node→nssvg→node→ns→prefix存在,呼叫asprintf(&qname, "%s:%s", n->ns->prefix, n->name)先把字串寫進qname,然後再通過自幹的函式把qnameoutString
  • 若不存在就直接呼叫自幹函式把svg→node→name的值給outString
1
dom_exception dom_document_get_element_by_id(dom_node *node, dom_string *string, dom_element **outNode);

svgtiny_gradient.c使用,先呼叫xmlHasProp()函式檢查document→node是否存在id這個屬性:

  • id屬性存在则檢查裏面是否存在需要的ID字串,把gradient的指標指向這個document;
  • 內部還實作了一個getElementById()函式,供這個API調用,也是類似呼叫xmlHasProp()和做strcasecmp()
1
dom_exception dom_element_get_attribute(dom_node *node, dom_string *string, dom_string **outAttribute);

呼叫xmlHasProp()得到指定的屬性,例如讀svgd屬性,也就是path。

1
dom_exception dom_element_get_elements_by_tag_name(dom_element *element, dom_string *string, dom_nodelist **outNodeList);

svgtiny_gradient.c使用。遍歷element→node→children鏈表,只要節點的名字和tag名相同,就把其加入nodeList裏。

1
dom_exception dom_node_get_node_type(dom_node *node, dom_node_type *outType);

需要注意node type的種類要滿足。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
dom_xml_error dom_xml_parser_completed(dom_xml_parser *parser);
dom_xml_error dom_xml_parser_destroy(dom_xml_parser *parser);
dom_exception dom_nodelist_get_length(dom_nodelist *nodeList, uint32_t *outLen);
dom_exception dom_nodelist_item(dom_nodelist *nodeList, uint32_t index, dom_node **outItemp);
void dom_node_unref(dom_node *node);
void dom_nodelist_unref(dom_nodelist *nodeList);
dom_exception dom_node_get_first_child(dom_element *element, dom_element **outChild);
int dom_string_caseless_isequal(dom_string *a, dom_string *b);
dom_exception dom_node_get_next_sibling(dom_element *element, dom_element **outChild);
dom_exception dom_text_get_whole_text(dom_element *element, dom_string **outString);
lwc_error lwc_intern_string(const char *data, size_t len, lwc_string **outString);
int dom_string_caseless_lwc_isequal(dom_string *str, lwc_string *lwcString);
void lwc_string_unref(lwc_string *lwcString);
uint32_t dom_string_byte_length(dom_string *str);
dom_exception dom_string_create_interned(const uint8_t *data, size_t len, dom_string **outString);
char *dom_string_data(dom_string *str);
int dom_string_isequal(dom_string *a, dom_string *b);
dom_string *dom_string_ref(dom_string *str);
void dom_string_unref(dom_string *str);

實作簡單或未實作。

DOM Parser Choices

YXML

  • 提供的接口函式太少
  • 使用stack替代malloc

EZXML

  • 資料結構比較合理,包含基本常用之node
  • 提供的API還算全面

嘗試可成功解析SVG檔:

  • 使用strcasecmp修復attr的大小寫問題,例如”viewbox”, “viewBox”;
  • node的type類型未初始化,除一開始parse到的node是DOM類型外,其餘node一律初始化爲ELEMENT類型。

以下是使用libxml2(左)和ezxml(右)的渲染對比。