| 1 | /* Document-class: FastXml |
|---|
| 2 | * Provides the base namespace for the FastXml classes |
|---|
| 3 | * # FastXml::Doc |
|---|
| 4 | * # FastXml::Node |
|---|
| 5 | * # FastXml::NodeList |
|---|
| 6 | * # FastXml::AttrList |
|---|
| 7 | */ |
|---|
| 8 | // Please see the LICENSE file for copyright, licensing and distribution information |
|---|
| 9 | |
|---|
| 10 | #define fastxml_c 1 |
|---|
| 11 | #include "fastxml.h" |
|---|
| 12 | #include "fastxml_node.h" |
|---|
| 13 | #include "fastxml_doc.h" |
|---|
| 14 | #include "fastxml_nodelist.h" |
|---|
| 15 | #include "fastxml_attrlist.h" |
|---|
| 16 | |
|---|
| 17 | #ifdef _WIN32 |
|---|
| 18 | __declspec(dllexport) |
|---|
| 19 | #endif |
|---|
| 20 | |
|---|
| 21 | |
|---|
| 22 | VALUE rb_mFastXml; |
|---|
| 23 | VALUE rb_cFastXmlDoc; |
|---|
| 24 | VALUE rb_cFastXmlNode; |
|---|
| 25 | VALUE rb_cFastXmlNodeList; |
|---|
| 26 | VALUE rb_cFastXmlAttrList; |
|---|
| 27 | VALUE rb_sValidateDtd; |
|---|
| 28 | VALUE rb_sForgivingParse; |
|---|
| 29 | VALUE rb_sHtmlParse; |
|---|
| 30 | ID s_readlines; |
|---|
| 31 | ID s_to_s; |
|---|
| 32 | |
|---|
| 33 | void Init_fastxml() |
|---|
| 34 | { |
|---|
| 35 | if (xmlHasFeature(XML_WITH_TREE) == 0) |
|---|
| 36 | rb_raise( rb_eRuntimeError, "libxml not built with tree support" ); |
|---|
| 37 | |
|---|
| 38 | if (xmlHasFeature(XML_WITH_XPATH) == 0) |
|---|
| 39 | rb_raise( rb_eRuntimeError, "libxml not built with xpath support" ); |
|---|
| 40 | |
|---|
| 41 | s_readlines = rb_intern("readlines"); |
|---|
| 42 | s_to_s = rb_intern("to_s"); |
|---|
| 43 | |
|---|
| 44 | xmlInitParser(); |
|---|
| 45 | xmlXPathInit(); |
|---|
| 46 | xsltInit(); |
|---|
| 47 | rb_mFastXml = rb_define_module( "FastXml" ); |
|---|
| 48 | rb_define_const( rb_mFastXml, "LIBXML_VERSION", rb_str_new2( LIBXML_DOTTED_VERSION ) ); |
|---|
| 49 | |
|---|
| 50 | /* setting symbols */ |
|---|
| 51 | rb_sValidateDtd = ID2SYM( rb_intern("validate") ); |
|---|
| 52 | rb_sForgivingParse = ID2SYM( rb_intern("forgiving") ); |
|---|
| 53 | rb_sHtmlParse = ID2SYM( rb_intern("html") ); |
|---|
| 54 | |
|---|
| 55 | Init_fastxml_doc(); /* Doc */ |
|---|
| 56 | Init_fastxml_node(); /* Node */ |
|---|
| 57 | Init_fastxml_nodelist(); /* NodeList */ |
|---|
| 58 | Init_fastxml_attrlist(); /* AttrList */ |
|---|
| 59 | |
|---|
| 60 | /* pull in the ruby side of things */ |
|---|
| 61 | rb_require( "fastxml/fastxml_lib" ); // ruby-side methods for the FastXml classes |
|---|
| 62 | rb_require( "fastxml/fastxml_helpers" ); // FastXml and FastHtml methods |
|---|
| 63 | } |
|---|
| 64 | |
|---|
| 65 | |
|---|
| 66 | |
|---|
| 67 | |
|---|
| 68 | void fastxml_data_mark( fxml_data_t *data ) |
|---|
| 69 | { |
|---|
| 70 | /* do nothing */ |
|---|
| 71 | } |
|---|
| 72 | |
|---|
| 73 | void fastxml_data_free( fxml_data_t *data ) |
|---|
| 74 | { |
|---|
| 75 | if (data != NULL) |
|---|
| 76 | { |
|---|
| 77 | if (data->xpath_obj != NULL) |
|---|
| 78 | xmlXPathFreeObject( data->xpath_obj ); |
|---|
| 79 | |
|---|
| 80 | if (data->xslt != NULL) |
|---|
| 81 | xsltFreeStylesheet( data->xslt ); |
|---|
| 82 | |
|---|
| 83 | if (data->doc != NULL && data->node == NULL && data->list == NULL && data->xpath_obj == NULL) |
|---|
| 84 | xmlFreeDoc( data->doc ); |
|---|
| 85 | |
|---|
| 86 | // the doc free will cleanup the nodes |
|---|
| 87 | |
|---|
| 88 | data->xpath_obj = NULL; |
|---|
| 89 | data->list = NULL; |
|---|
| 90 | data->doc = NULL; |
|---|
| 91 | data->xslt = NULL; |
|---|
| 92 | data->node = NULL; |
|---|
| 93 | free( data ); |
|---|
| 94 | } |
|---|
| 95 | data = NULL; |
|---|
| 96 | } |
|---|
| 97 | |
|---|
| 98 | VALUE fastxml_data_alloc( VALUE klass ) |
|---|
| 99 | { |
|---|
| 100 | return Qnil; |
|---|
| 101 | } |
|---|
| 102 | |
|---|
| 103 | VALUE fastxml_raw_node_to_my_obj(xmlNodePtr cur, fxml_data_t *chld) |
|---|
| 104 | { |
|---|
| 105 | VALUE dv_chld, new_tmp; |
|---|
| 106 | chld->node = cur; |
|---|
| 107 | chld->doc = cur->doc; |
|---|
| 108 | |
|---|
| 109 | new_tmp = rb_class_new_instance( 0, 0, rb_cFastXmlNode ); |
|---|
| 110 | dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, chld ); |
|---|
| 111 | rb_iv_set( new_tmp, "@lxml_doc", dv_chld ); |
|---|
| 112 | |
|---|
| 113 | return new_tmp; |
|---|
| 114 | } |
|---|
| 115 | |
|---|
| 116 | VALUE fastxml_raw_node_to_obj(xmlNodePtr cur) |
|---|
| 117 | { |
|---|
| 118 | fxml_data_t *chld = ALLOC(fxml_data_t); |
|---|
| 119 | memset( chld, 0, sizeof(fxml_data_t) ); |
|---|
| 120 | return fastxml_raw_node_to_my_obj( cur, chld ); |
|---|
| 121 | } |
|---|
| 122 | |
|---|
| 123 | VALUE fastxml_nodelist_to_obj(xmlNodePtr root, int len) |
|---|
| 124 | { |
|---|
| 125 | VALUE ret, dv_chld; |
|---|
| 126 | xmlNodePtr cur = root; |
|---|
| 127 | fxml_data_t *ndlst = ALLOC(fxml_data_t); |
|---|
| 128 | memset( ndlst, 0, sizeof(fxml_data_t) ); |
|---|
| 129 | |
|---|
| 130 | ndlst->list_len = len; |
|---|
| 131 | ndlst->list = cur; |
|---|
| 132 | ret = rb_class_new_instance( 0, 0, rb_cFastXmlNodeList ); |
|---|
| 133 | dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, ndlst ); |
|---|
| 134 | rb_iv_set( ret, "@lxml_doc", dv_chld ); |
|---|
| 135 | |
|---|
| 136 | return ret; |
|---|
| 137 | } |
|---|
| 138 | |
|---|
| 139 | VALUE fastxml_nodeset_to_obj(xmlXPathObjectPtr raw_xpath_obj, fxml_data_t *data) |
|---|
| 140 | { |
|---|
| 141 | VALUE ret, dv_chld; |
|---|
| 142 | fxml_data_t *ndlst = ALLOC(fxml_data_t); |
|---|
| 143 | memset( ndlst, 0, sizeof(fxml_data_t) ); |
|---|
| 144 | |
|---|
| 145 | ndlst->xpath_obj = raw_xpath_obj; |
|---|
| 146 | ndlst->list_len = raw_xpath_obj->nodesetval->nodeNr; |
|---|
| 147 | ret = rb_class_new_instance( 0, 0, rb_cFastXmlNodeList ); |
|---|
| 148 | dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, ndlst ); |
|---|
| 149 | rb_iv_set( ret, "@lxml_doc", dv_chld ); |
|---|
| 150 | |
|---|
| 151 | return ret; |
|---|
| 152 | } |
|---|
| 153 | |
|---|
| 154 | VALUE munge_xpath_namespace( VALUE orig_expr, xmlChar *root_ns ) |
|---|
| 155 | { |
|---|
| 156 | VALUE path_bits = rb_str_split( orig_expr, "/" ); |
|---|
| 157 | VALUE ns_prefix = rb_str_new2( (const char*)root_ns ); |
|---|
| 158 | VALUE ns_indic = rb_str_new2( ":" ); |
|---|
| 159 | VALUE slash = rb_str_new2( "/" ); |
|---|
| 160 | VALUE path_bit, str_idx; |
|---|
| 161 | VALUE ret_ary = rb_ary_new(); |
|---|
| 162 | long i; |
|---|
| 163 | |
|---|
| 164 | rb_str_append( ns_prefix, ns_indic ); |
|---|
| 165 | for (i=0; i<RARRAY(path_bits)->len; i++) { |
|---|
| 166 | path_bit = RARRAY(path_bits)->ptr[i]; |
|---|
| 167 | |
|---|
| 168 | if (RSTRING_LEN(path_bit) > 0) { |
|---|
| 169 | str_idx = rb_funcall( path_bit, rb_intern( "index" ), 1, ns_indic ); |
|---|
| 170 | if (str_idx == Qnil || str_idx == Qfalse) // didn't find the :, so it looks like we don't have a namespace |
|---|
| 171 | path_bit = rb_str_plus( ns_prefix, path_bit ); |
|---|
| 172 | } |
|---|
| 173 | |
|---|
| 174 | rb_ary_push( ret_ary, path_bit ); |
|---|
| 175 | } |
|---|
| 176 | |
|---|
| 177 | return rb_ary_join( ret_ary, slash ); |
|---|
| 178 | } |
|---|
| 179 | |
|---|
| 180 | |
|---|
| 181 | /** |
|---|
| 182 | * fastxml_xpath_search provides a common xpath search function for |
|---|
| 183 | * the libraries bits (node, doc). it handles mangling non-namespaced |
|---|
| 184 | * xpath queries into something libxml will play nice with |
|---|
| 185 | */ |
|---|
| 186 | VALUE fastxml_xpath_search(VALUE self, VALUE raw_xpath, VALUE blk) |
|---|
| 187 | { |
|---|
| 188 | VALUE ret, dv, xpath_s; |
|---|
| 189 | xmlXPathCompExprPtr xpath_xpr; |
|---|
| 190 | xmlXPathContextPtr xpath_ctx; |
|---|
| 191 | xmlXPathObjectPtr xpath_obj; |
|---|
| 192 | fxml_data_t *data; |
|---|
| 193 | xmlChar *xpath_expr; |
|---|
| 194 | xmlNodePtr root = NULL; |
|---|
| 195 | xmlNsPtr *ns_list = NULL; |
|---|
| 196 | xmlNsPtr *cur_ns = NULL; |
|---|
| 197 | xmlChar *root_ns = NULL; |
|---|
| 198 | int ns_cnt = 0; |
|---|
| 199 | |
|---|
| 200 | if (NIL_P(raw_xpath)) |
|---|
| 201 | rb_raise(rb_eArgError, "nil passed as xpath"); |
|---|
| 202 | |
|---|
| 203 | dv = rb_iv_get( self, "@lxml_doc" ); |
|---|
| 204 | Data_Get_Struct( dv, fxml_data_t, data ); |
|---|
| 205 | |
|---|
| 206 | xpath_ctx = xmlXPathNewContext( data->doc ); |
|---|
| 207 | if (xpath_ctx == NULL) |
|---|
| 208 | rb_raise( rb_eRuntimeError, "unable to create xpath context" ); |
|---|
| 209 | |
|---|
| 210 | root = data->node; |
|---|
| 211 | if (root == NULL) |
|---|
| 212 | root = xmlDocGetRootElement( data->doc ); |
|---|
| 213 | |
|---|
| 214 | xpath_ctx->node = root; |
|---|
| 215 | cur_ns = ns_list = xmlGetNsList( data->doc, root ); |
|---|
| 216 | while (cur_ns != NULL && (*cur_ns) != NULL) { |
|---|
| 217 | xmlXPathRegisterNs( xpath_ctx, (*cur_ns)->prefix, (*cur_ns)->href ); |
|---|
| 218 | cur_ns++; |
|---|
| 219 | } |
|---|
| 220 | |
|---|
| 221 | if (ns_list != NULL) { |
|---|
| 222 | xpath_ctx->namespaces = ns_list; |
|---|
| 223 | xpath_ctx->nsNr = ns_cnt; |
|---|
| 224 | } |
|---|
| 225 | |
|---|
| 226 | xpath_s = rb_obj_as_string( raw_xpath ); |
|---|
| 227 | if (root->ns != NULL) { // we have a base namespace, this is going to get "interesting" |
|---|
| 228 | root_ns = (xmlChar*)root->ns->prefix; |
|---|
| 229 | if (root_ns == NULL) |
|---|
| 230 | root_ns = (xmlChar*)"__myFunkyLittleRootNsNotToBeUseByAnyoneElseIHope__"; |
|---|
| 231 | // alternatives? how do other xpath processors handle root/default namespaces? |
|---|
| 232 | |
|---|
| 233 | xmlXPathRegisterNs( xpath_ctx, root_ns, root->ns->href ); |
|---|
| 234 | // need to update the xpath expression |
|---|
| 235 | xpath_s = munge_xpath_namespace( xpath_s, root_ns ); |
|---|
| 236 | xpath_ctx->nsNr++; |
|---|
| 237 | } |
|---|
| 238 | |
|---|
| 239 | xpath_expr = (xmlChar*)RSTRING_PTR(xpath_s); |
|---|
| 240 | xpath_xpr = xmlXPathCompile( xpath_expr ); |
|---|
| 241 | if (xpath_xpr == NULL) { |
|---|
| 242 | xmlXPathFreeContext( xpath_ctx ); |
|---|
| 243 | xmlFree( ns_list ); |
|---|
| 244 | rb_raise( rb_eRuntimeError, "unable to evaluate xpath expression" ); |
|---|
| 245 | } |
|---|
| 246 | |
|---|
| 247 | xpath_obj = xmlXPathCompiledEval( xpath_xpr, xpath_ctx ); |
|---|
| 248 | if (xpath_obj == NULL) { |
|---|
| 249 | rb_raise( rb_eRuntimeError, "unable to evaluate xpath expression" ); |
|---|
| 250 | xmlXPathFreeCompExpr( xpath_xpr ); |
|---|
| 251 | xmlXPathFreeContext( xpath_ctx ); |
|---|
| 252 | xmlFree( ns_list ); |
|---|
| 253 | return Qnil; |
|---|
| 254 | } |
|---|
| 255 | |
|---|
| 256 | ret = fastxml_nodeset_to_obj( xpath_obj, data ); |
|---|
| 257 | |
|---|
| 258 | xmlFree( ns_list ); |
|---|
| 259 | xmlXPathFreeCompExpr( xpath_xpr ); |
|---|
| 260 | xmlXPathFreeContext( xpath_ctx ); |
|---|
| 261 | |
|---|
| 262 | return ret; |
|---|
| 263 | } |
|---|
| 264 | |
|---|