root/ext/fastxml.c

Revision 2168c40635a237e84a69b74bb34c24f230052ae9, 7.2 KB (checked in by Mark Guzman <segfault@…>, 4 months ago)

finally loading properly as a gem

  • Property mode set to 100644
Line 
1/* Document-class: FastXml
2 *  Provides the base namespace for the FastXml classes
3 *   # FastXml::Doc
4 *   # FastXml::Node
5 *   # FastXml::NodeList
6 *   # FastXml::AttrList
7 */
8// Please see the LICENSE file for copyright, licensing and distribution information
9
10#define fastxml_c 1
11#include "fastxml.h"
12#include "fastxml_node.h"
13#include "fastxml_doc.h"
14#include "fastxml_nodelist.h"
15#include "fastxml_attrlist.h"
16
17#ifdef _WIN32
18__declspec(dllexport) 
19#endif
20
21
22VALUE rb_mFastXml;
23VALUE rb_cFastXmlDoc;
24VALUE rb_cFastXmlNode;
25VALUE rb_cFastXmlNodeList;
26VALUE rb_cFastXmlAttrList;
27VALUE rb_sValidateDtd;
28VALUE rb_sForgivingParse;
29VALUE rb_sHtmlParse;
30ID s_readlines;
31ID s_to_s;
32
33void Init_fastxml()
34{       
35    if (xmlHasFeature(XML_WITH_TREE) == 0)
36        rb_raise( rb_eRuntimeError, "libxml not built with tree support" );
37
38    if (xmlHasFeature(XML_WITH_XPATH) == 0)
39        rb_raise( rb_eRuntimeError, "libxml not built with xpath support" );
40
41        s_readlines = rb_intern("readlines");
42        s_to_s = rb_intern("to_s");
43
44    xmlInitParser();
45    xmlXPathInit();
46        xsltInit();
47    rb_mFastXml = rb_define_module( "FastXml" );
48    rb_define_const( rb_mFastXml, "LIBXML_VERSION", rb_str_new2( LIBXML_DOTTED_VERSION ) );
49
50    /* setting symbols */
51    rb_sValidateDtd = ID2SYM( rb_intern("validate") );
52    rb_sForgivingParse = ID2SYM( rb_intern("forgiving") );
53    rb_sHtmlParse = ID2SYM( rb_intern("html") );
54   
55        Init_fastxml_doc();       /* Doc */   
56        Init_fastxml_node();      /* Node */
57        Init_fastxml_nodelist();  /* NodeList */
58        Init_fastxml_attrlist();  /* AttrList */
59       
60        /* pull in the ruby side of things */
61        rb_require( "fastxml/fastxml_lib" );      // ruby-side methods for the FastXml classes
62        rb_require( "fastxml/fastxml_helpers" );  // FastXml and FastHtml methods
63}
64
65
66
67
68void fastxml_data_mark( fxml_data_t *data )
69{
70    /* do nothing */
71}
72
73void fastxml_data_free( fxml_data_t *data )
74{
75    if (data != NULL)
76    {
77                if (data->xpath_obj != NULL) 
78                        xmlXPathFreeObject( data->xpath_obj );                 
79       
80                if (data->xslt != NULL)
81                        xsltFreeStylesheet( data->xslt );
82       
83        if (data->doc != NULL && data->node == NULL && data->list == NULL && data->xpath_obj == NULL) 
84            xmlFreeDoc( data->doc );
85
86        // the doc free will cleanup the nodes
87
88                data->xpath_obj = NULL;
89                data->list = NULL;
90        data->doc = NULL;
91                data->xslt = NULL;
92                data->node = NULL;
93        free( data );
94    }
95    data = NULL;
96}
97
98VALUE fastxml_data_alloc( VALUE klass )
99{
100    return Qnil;
101}
102
103VALUE fastxml_raw_node_to_my_obj(xmlNodePtr cur, fxml_data_t *chld)
104{
105    VALUE dv_chld, new_tmp;
106    chld->node = cur;
107    chld->doc = cur->doc; 
108
109    new_tmp = rb_class_new_instance( 0, 0, rb_cFastXmlNode ); 
110    dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, chld );
111    rb_iv_set( new_tmp, "@lxml_doc", dv_chld ); 
112       
113        return new_tmp; 
114}
115
116VALUE fastxml_raw_node_to_obj(xmlNodePtr cur)
117{
118    fxml_data_t *chld = ALLOC(fxml_data_t);
119    memset( chld, 0, sizeof(fxml_data_t) );
120        return fastxml_raw_node_to_my_obj( cur, chld );
121}
122
123VALUE fastxml_nodelist_to_obj(xmlNodePtr root, int len)
124{
125    VALUE ret, dv_chld;
126    xmlNodePtr cur = root;
127        fxml_data_t *ndlst = ALLOC(fxml_data_t);
128        memset( ndlst, 0, sizeof(fxml_data_t) );
129       
130        ndlst->list_len = len;
131        ndlst->list = cur;
132        ret = rb_class_new_instance( 0, 0, rb_cFastXmlNodeList ); 
133    dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, ndlst );
134    rb_iv_set( ret, "@lxml_doc", dv_chld );
135   
136        return ret;
137}
138
139VALUE fastxml_nodeset_to_obj(xmlXPathObjectPtr raw_xpath_obj, fxml_data_t *data)
140{
141    VALUE ret, dv_chld;
142        fxml_data_t *ndlst = ALLOC(fxml_data_t);
143        memset( ndlst, 0, sizeof(fxml_data_t) );
144       
145        ndlst->xpath_obj = raw_xpath_obj;
146        ndlst->list_len = raw_xpath_obj->nodesetval->nodeNr;
147        ret = rb_class_new_instance( 0, 0, rb_cFastXmlNodeList ); 
148    dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, ndlst );
149    rb_iv_set( ret, "@lxml_doc", dv_chld );
150   
151        return ret;
152}
153
154VALUE munge_xpath_namespace( VALUE orig_expr, xmlChar *root_ns )
155{
156        VALUE path_bits = rb_str_split( orig_expr, "/" );
157        VALUE ns_prefix = rb_str_new2( (const char*)root_ns );
158        VALUE ns_indic = rb_str_new2( ":" );
159        VALUE slash = rb_str_new2( "/" );
160        VALUE path_bit, str_idx;
161        VALUE ret_ary = rb_ary_new();
162        long i;
163       
164        rb_str_append( ns_prefix, ns_indic );
165    for (i=0; i<RARRAY(path_bits)->len; i++) {
166        path_bit = RARRAY(path_bits)->ptr[i];
167               
168                if (RSTRING_LEN(path_bit) > 0) {
169                        str_idx = rb_funcall( path_bit, rb_intern( "index" ), 1, ns_indic );
170                        if (str_idx == Qnil || str_idx == Qfalse) // didn't find the :, so it looks like we don't have a namespace
171                                path_bit = rb_str_plus( ns_prefix, path_bit );
172                }       
173               
174                rb_ary_push( ret_ary, path_bit );
175    }
176       
177        return rb_ary_join( ret_ary, slash );
178}
179
180
181/**
182 *  fastxml_xpath_search provides a common xpath search function for
183 * the libraries bits (node, doc). it handles mangling non-namespaced
184 * xpath queries into something libxml will play nice with
185 */
186VALUE fastxml_xpath_search(VALUE self, VALUE raw_xpath, VALUE blk)
187{
188    VALUE ret, dv, xpath_s;
189        xmlXPathCompExprPtr xpath_xpr;
190    xmlXPathContextPtr xpath_ctx; 
191    xmlXPathObjectPtr xpath_obj;     
192    fxml_data_t *data;
193    xmlChar *xpath_expr;
194        xmlNodePtr root = NULL;
195        xmlNsPtr *ns_list = NULL;
196        xmlNsPtr *cur_ns = NULL;
197        xmlChar *root_ns = NULL;
198        int ns_cnt = 0;
199
200    if (NIL_P(raw_xpath)) 
201        rb_raise(rb_eArgError, "nil passed as xpath");
202
203    dv = rb_iv_get( self, "@lxml_doc" );   
204    Data_Get_Struct( dv, fxml_data_t, data ); 
205
206    xpath_ctx = xmlXPathNewContext( data->doc );
207    if (xpath_ctx == NULL) 
208        rb_raise( rb_eRuntimeError, "unable to create xpath context" );
209
210        root = data->node;
211        if (root == NULL)
212                root = xmlDocGetRootElement( data->doc );
213               
214        xpath_ctx->node = root;
215        cur_ns = ns_list = xmlGetNsList( data->doc, root );
216        while (cur_ns != NULL && (*cur_ns) != NULL) { 
217                xmlXPathRegisterNs( xpath_ctx, (*cur_ns)->prefix, (*cur_ns)->href );
218                cur_ns++;
219        }
220
221    if (ns_list != NULL) {
222            xpath_ctx->namespaces = ns_list;
223            xpath_ctx->nsNr = ns_cnt;
224    }
225       
226        xpath_s = rb_obj_as_string( raw_xpath );
227        if (root->ns != NULL) { // we have a base namespace, this is going to get "interesting"
228                root_ns = (xmlChar*)root->ns->prefix;
229                if (root_ns == NULL) 
230                        root_ns = (xmlChar*)"__myFunkyLittleRootNsNotToBeUseByAnyoneElseIHope__"; 
231            // alternatives? how do other xpath processors handle root/default namespaces?
232
233                xmlXPathRegisterNs( xpath_ctx, root_ns, root->ns->href );
234                // need to update the xpath expression
235                xpath_s = munge_xpath_namespace( xpath_s, root_ns );
236                xpath_ctx->nsNr++;
237        }
238       
239        xpath_expr = (xmlChar*)RSTRING_PTR(xpath_s);
240        xpath_xpr = xmlXPathCompile( xpath_expr );
241        if (xpath_xpr == NULL) {
242                xmlXPathFreeContext( xpath_ctx );
243                xmlFree( ns_list );             
244                rb_raise( rb_eRuntimeError, "unable to evaluate xpath expression" );
245        }       
246
247        xpath_obj = xmlXPathCompiledEval( xpath_xpr, xpath_ctx );
248    if (xpath_obj == NULL) {
249        rb_raise( rb_eRuntimeError, "unable to evaluate xpath expression" );
250                xmlXPathFreeCompExpr( xpath_xpr );
251        xmlXPathFreeContext( xpath_ctx ); 
252                xmlFree( ns_list );
253        return Qnil;
254    }   
255
256    ret = fastxml_nodeset_to_obj( xpath_obj, data );
257
258        xmlFree( ns_list );
259        xmlXPathFreeCompExpr( xpath_xpr );
260    xmlXPathFreeContext( xpath_ctx ); 
261
262    return ret; 
263}
264
Note: See TracBrowser for help on using the browser.