Changeset 1a1f24b0886a09173c55cf0e244099c90f41f889
- Timestamp:
- 07/26/07 01:17:19 (18 months ago)
- Author:
- Mark Guzman <segfault@…>
- Parents:
- b9a586225c39677d89f1197d41d2eb7bf357565e
- Children:
- d32b7350ffb781a149cb02ca0ef3966011ead316
- git-committer:
- Mark Guzman <segfault@hasno.info> / 2007-07-26T05:17:19Z+0000
- Message:
-
xpath now works in the default namespace
added the speedtest benchmark
git-svn-id: svn://hasno.info/fastxml/trunk@16 b3082176-f867-4bde-be85-e3c57d66f029
- Files:
-
Legend:
- Unmodified
- Added
- Removed
-
|
rb9a586
|
r1a1f24
|
|
| 37 | 37 | static VALUE fastxml_doc_stylesheet(VALUE self); |
| 38 | 38 | static VALUE fastxml_doc_stylesheet_set(VALUE self, VALUE style); |
| | 39 | |
| | 40 | VALUE fastxml_xpath_search(VALUE self, VALUE raw_xpath); |
| 39 | 41 | |
| 40 | 42 | static VALUE fastxml_node_initialize(VALUE self); |
| … |
… |
|
| 169 | 171 | xmlFree( ents ); |
| 170 | 172 | |
| 171 | | |
| 172 | 173 | return new_val; |
| 173 | 174 | } |
| … |
… |
|
| 177 | 178 | VALUE ret, dv; |
| 178 | 179 | fxml_data_t *data; |
| | 180 | xmlChar *cont; |
| 179 | 181 | |
| 180 | 182 | dv = rb_iv_get( self, "@lxml_node" ); |
| 181 | 183 | Data_Get_Struct( dv, fxml_data_t, data ); |
| 182 | 184 | |
| 183 | | if (data->node->content == NULL) |
| | 185 | cont = xmlNodeGetContent( data->node ); |
| | 186 | |
| | 187 | if (cont == NULL) |
| 184 | 188 | return Qnil; |
| 185 | 189 | |
| 186 | | ret = rb_str_new2( (const char*)data->node->content ); |
| | 190 | ret = rb_str_new2( (const char*)cont ); |
| 187 | 191 | |
| 188 | 192 | return ret; |
| … |
… |
|
| 210 | 214 | static VALUE fastxml_node_search(VALUE self, VALUE raw_xpath) |
| 211 | 215 | { |
| 212 | | return Qnil; |
| | 216 | return fastxml_xpath_search( self, raw_xpath ); |
| 213 | 217 | } |
| 214 | 218 | |
| … |
… |
|
| 268 | 272 | } |
| 269 | 273 | |
| 270 | | static VALUE fastxml_doc_search(VALUE self, VALUE raw_xpath) |
| | 274 | static VALUE munge_xpath_namespace( VALUE orig_expr, xmlChar *root_ns ) |
| | 275 | { |
| | 276 | VALUE path_bits = rb_str_split( orig_expr, "/" ); |
| | 277 | VALUE ns_prefix = rb_str_new2( (const char*)root_ns ); |
| | 278 | VALUE ns_indic = rb_str_new2( ":" ); |
| | 279 | VALUE slash = rb_str_new2( "/" ); |
| | 280 | VALUE path_bit, str_idx; |
| | 281 | VALUE ret_ary = rb_ary_new(); |
| | 282 | long i; |
| | 283 | |
| | 284 | rb_str_append( ns_prefix, ns_indic ); |
| | 285 | for (i=0; i<RARRAY(path_bits)->len; i++) { |
| | 286 | path_bit = RARRAY(path_bits)->ptr[i]; |
| | 287 | |
| | 288 | if (RSTRING(path_bit)->len > 0) { |
| | 289 | str_idx = rb_funcall( path_bit, rb_intern( "index" ), 1, ns_indic ); |
| | 290 | if (str_idx == Qnil || str_idx == Qfalse) // didn't find the :, so it looks like we don't have a namespace |
| | 291 | path_bit = rb_str_plus( ns_prefix, path_bit ); |
| | 292 | } |
| | 293 | |
| | 294 | rb_ary_push( ret_ary, path_bit ); |
| | 295 | } |
| | 296 | |
| | 297 | return rb_ary_join( ret_ary, slash ); |
| | 298 | } |
| | 299 | |
| | 300 | VALUE fastxml_xpath_search(VALUE self, VALUE raw_xpath) |
| 271 | 301 | { |
| 272 | 302 | VALUE ret, dv, xpath_s; |
| … |
… |
|
| 279 | 309 | xmlNsPtr *ns_list = NULL; |
| 280 | 310 | xmlNsPtr *cur_ns = NULL; |
| | 311 | xmlChar *root_ns = NULL; |
| 281 | 312 | int ns_cnt = 0; |
| 282 | 313 | |
| 283 | 314 | if (NIL_P(raw_xpath)) { |
| 284 | | //printf("got nil\n"); |
| 285 | 315 | rb_raise(rb_eArgError, "nil passed as xpath"); |
| 286 | 316 | return Qnil; |
| 287 | 317 | } |
| 288 | | |
| 289 | | xpath_s = rb_obj_as_string( raw_xpath ); |
| 290 | | xpath_expr = (xmlChar*)StringValuePtr( xpath_s ); |
| 291 | | printf("got xpath: %s\n", xpath_expr); |
| 292 | 318 | |
| 293 | 319 | dv = rb_iv_get( self, "@lxml_doc" ); |
| … |
… |
|
| 300 | 326 | } |
| 301 | 327 | |
| 302 | | root = xmlDocGetRootElement( data->doc ); |
| | 328 | root = data->node; |
| | 329 | if (root == NULL) |
| | 330 | root = xmlDocGetRootElement( data->doc ); |
| | 331 | |
| | 332 | xpath_ctx->node = root; |
| 303 | 333 | ns_list = xmlGetNsList( data->doc, root ); |
| 304 | 334 | while (cur_ns != NULL) { |
| 305 | | printf( "%s -> %s\n", (*cur_ns)->prefix, (*cur_ns)->href ); |
| 306 | 335 | xmlXPathRegisterNs( xpath_ctx, (*cur_ns)->prefix, (*cur_ns)->href ); |
| 307 | 336 | cur_ns++; |
| … |
… |
|
| 310 | 339 | xpath_ctx->namespaces = ns_list; |
| 311 | 340 | xpath_ctx->nsNr = ns_cnt; |
| 312 | | |
| 313 | | if (root->ns != NULL) { |
| 314 | | printf("here %s -> %s\n\n", root->ns->prefix, root->ns->href ); |
| 315 | | xmlXPathRegisterNs( xpath_ctx, root->ns->prefix, root->ns->href ); |
| | 341 | |
| | 342 | xpath_s = rb_obj_as_string( raw_xpath ); |
| | 343 | |
| | 344 | |
| | 345 | if (root->ns != NULL) { // we have a base namespace, this is going to get "interesting" |
| | 346 | root_ns = (xmlChar*)root->ns->prefix; |
| | 347 | if (root_ns == NULL) |
| | 348 | root_ns = (xmlChar*)"myFunkyLittleRootNsNotToBeUseByAnyoneElseIHope"; // alternatives? how do other xpath processors handle root/default namespaces? |
| | 349 | |
| | 350 | xmlXPathRegisterNs( xpath_ctx, root_ns, root->ns->href ); |
| | 351 | // need to update the xpath expression |
| | 352 | xpath_s = munge_xpath_namespace( xpath_s, root_ns ); |
| 316 | 353 | xpath_ctx->nsNr++; |
| 317 | 354 | } |
| 318 | | printf("nsNr: %d\n", xpath_ctx->nsNr); |
| 319 | | |
| | 355 | |
| | 356 | xpath_expr = (xmlChar*)StringValuePtr( xpath_s ); |
| 320 | 357 | xpath_xpr = xmlXPathCompile( xpath_expr ); |
| 321 | 358 | if (xpath_xpr == NULL) { |
| … |
… |
|
| 326 | 363 | } |
| 327 | 364 | |
| 328 | | //xpath_obj = xmlXPathEval( xpath_expr, xpath_ctx ); |
| 329 | | //xpath_obj = xmlXPathEvalExpression( xpath_expr, xpath_ctx ); |
| | 365 | |
| 330 | 366 | xpath_obj = xmlXPathCompiledEval( xpath_xpr, xpath_ctx ); |
| 331 | 367 | if(xpath_obj == NULL) { |
| … |
… |
|
| 344 | 380 | xmlXPathFreeContext( xpath_ctx ); |
| 345 | 381 | |
| 346 | | return ret; |
| | 382 | return ret; |
| | 383 | } |
| | 384 | |
| | 385 | static VALUE fastxml_doc_search(VALUE self, VALUE raw_xpath) |
| | 386 | { |
| | 387 | return fastxml_xpath_search( self, raw_xpath ); |
| 347 | 388 | } |
| 348 | 389 | |
| … |
… |
|
| 357 | 398 | ret = rb_ary_new(); |
| 358 | 399 | size = (nodes) ? nodes->nodeNr : 0; |
| 359 | | printf("size: %d\n", size); |
| 360 | 400 | |
| 361 | 401 | for (i = 0; i < size; i++) { |
| 362 | 402 | cur = nodes->nodeTab[i]; |
| 363 | | //printf( "checking node: %s type: %d\n", cur->name, cur->type ); |
| 364 | 403 | |
| 365 | 404 | chld = ALLOC(fxml_data_t); |