libxml/tree/
document.rs

1//! Document feature set
2//!
3use libc::{c_char, c_int};
4use std::cell::RefCell;
5use std::collections::HashMap;
6use std::ffi::{CStr, CString};
7use std::fmt;
8use std::ptr;
9use std::rc::{Rc, Weak};
10use std::str;
11
12use crate::bindings::*;
13use crate::readonly::RoNode;
14use crate::tree::node::Node;
15
16pub(crate) type DocumentRef = Rc<RefCell<_Document>>;
17pub(crate) type DocumentWeak = Weak<RefCell<_Document>>;
18
19#[derive(Debug, Copy, Clone, Default)]
20/// Save Options for Document
21pub struct SaveOptions {
22  /// format save output
23  pub format: bool,
24  /// drop the xml declaration
25  pub no_declaration: bool,
26  /// no empty tags
27  pub no_empty_tags: bool,
28  /// disable XHTML1 specific rules
29  pub no_xhtml: bool,
30  /// force XHTML1 specific rules
31  pub xhtml: bool,
32  /// force XML serialization on HTML doc
33  pub as_xml: bool,
34  /// force HTML serialization on XML doc
35  pub as_html: bool,
36  /// format with non-significant whitespace
37  pub non_significant_whitespace: bool,
38}
39
40#[derive(Debug)]
41pub(crate) struct _Document {
42  /// pointer to a libxml document
43  pub(crate) doc_ptr: xmlDocPtr,
44  /// hashed pointer-to-Node bookkeeping table
45  nodes: HashMap<xmlNodePtr, Node>,
46}
47
48impl _Document {
49  /// Internal bookkeeping function, so far only used by `Node::wrap`
50  pub(crate) fn insert_node(&mut self, node_ptr: xmlNodePtr, node: Node) {
51    self.nodes.insert(node_ptr, node);
52  }
53  /// Internal bookkeeping function, so far only used by `Node::wrap`
54  pub(crate) fn get_node(&self, node_ptr: xmlNodePtr) -> Option<&Node> {
55    self.nodes.get(&node_ptr)
56  }
57  /// Internal bookkeeping function
58  pub(crate) fn forget_node(&mut self, node_ptr: xmlNodePtr) {
59    self.nodes.remove(&node_ptr);
60  }
61}
62
63/// A libxml2 Document
64#[derive(Clone)]
65pub struct Document(pub(crate) DocumentRef);
66
67impl Drop for _Document {
68  ///Free document when it goes out of scope
69  fn drop(&mut self) {
70    unsafe {
71      if !self.doc_ptr.is_null() {
72        xmlFreeDoc(self.doc_ptr);
73      }
74    }
75  }
76}
77
78impl fmt::Display for Document {
79  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80    write!(f, "{}", self.to_string_with_options(SaveOptions::default()))
81  }
82}
83
84impl Document {
85  /// Creates a new empty libxml2 document
86  pub fn new() -> Result<Self, ()> {
87    unsafe {
88      let c_version = CString::new("1.0").unwrap();
89      let c_version_bytes = c_version.as_bytes();
90      let doc_ptr = xmlNewDoc(c_version_bytes.as_ptr());
91      if doc_ptr.is_null() {
92        Err(())
93      } else {
94        let doc = _Document {
95          doc_ptr,
96          nodes: HashMap::new(),
97        };
98        Ok(Document(Rc::new(RefCell::new(doc))))
99      }
100    }
101  }
102
103  /// Obtain the underlying libxml2 `xmlDocPtr` for this Document
104  pub fn doc_ptr(&self) -> xmlDocPtr {
105    self.0.borrow().doc_ptr
106  }
107
108  /// Creates a new `Document` from an existing libxml2 pointer
109  pub fn new_ptr(doc_ptr: xmlDocPtr) -> Self {
110    let doc = _Document {
111      doc_ptr,
112      nodes: HashMap::new(),
113    };
114    Document(Rc::new(RefCell::new(doc)))
115  }
116
117  pub(crate) fn null_ref() -> DocumentRef {
118    Rc::new(RefCell::new(_Document {
119      doc_ptr: ptr::null_mut(),
120      nodes: HashMap::new(),
121    }))
122  }
123
124  /// Write document to `filename`
125  pub fn save_file(&self, filename: &str) -> Result<c_int, ()> {
126    let c_filename = CString::new(filename).unwrap();
127    unsafe {
128      let retval = xmlSaveFile(c_filename.as_ptr(), self.doc_ptr());
129      if retval < 0 {
130        return Err(());
131      }
132      Ok(retval)
133    }
134  }
135
136  pub(crate) fn register_node(&self, node_ptr: xmlNodePtr) -> Node {
137    Node::wrap(node_ptr, &self.0)
138  }
139
140  /// Get the root element of the document
141  pub fn get_root_element(&self) -> Option<Node> {
142    unsafe {
143      let node_ptr = xmlDocGetRootElement(self.doc_ptr());
144      if node_ptr.is_null() {
145        None
146      } else {
147        Some(self.register_node(node_ptr))
148      }
149    }
150  }
151
152  /// Get the root element of the document (read-only)
153  pub fn get_root_readonly(&self) -> Option<RoNode> {
154    unsafe {
155      let node_ptr = xmlDocGetRootElement(self.doc_ptr());
156      if node_ptr.is_null() {
157        None
158      } else {
159        Some(RoNode(node_ptr))
160      }
161    }
162  }
163
164  /// Sets the root element of the document
165  pub fn set_root_element(&mut self, root: &Node) {
166    unsafe {
167      xmlDocSetRootElement(self.doc_ptr(), root.node_ptr());
168    }
169    root.set_linked();
170  }
171
172  fn ptr_as_result(&mut self, node_ptr: xmlNodePtr) -> Result<Node, ()> {
173    if node_ptr.is_null() {
174      Err(())
175    } else {
176      let node = self.register_node(node_ptr);
177      Ok(node)
178    }
179  }
180
181  /// Import a `Node` from another `Document`
182  pub fn import_node(&mut self, node: &mut Node) -> Result<Node, ()> {
183    if !node.is_unlinked() {
184      return Err(());
185    }
186    // Also remove this node from the prior document hash
187    node
188      .get_docref()
189      .upgrade()
190      .unwrap()
191      .borrow_mut()
192      .forget_node(node.node_ptr());
193
194    let node_ptr = unsafe { xmlDocCopyNode(node.node_ptr(), self.doc_ptr(), 1) };
195    node.set_linked();
196    self.ptr_as_result(node_ptr)
197  }
198
199  /// Serializes the `Document` with options
200  pub fn to_string_with_options(&self, options: SaveOptions) -> String {
201    unsafe {
202      // allocate a buffer to dump into
203      let buf = xmlBufferCreate();
204      let c_utf8 = CString::new("UTF-8").unwrap();
205      let mut xml_options = 0;
206
207      if options.format {
208        xml_options += xmlSaveOption_XML_SAVE_FORMAT;
209      }
210      if options.no_declaration {
211        xml_options += xmlSaveOption_XML_SAVE_NO_DECL;
212      }
213      if options.no_empty_tags {
214        xml_options += xmlSaveOption_XML_SAVE_NO_EMPTY;
215      }
216      if options.no_xhtml {
217        xml_options += xmlSaveOption_XML_SAVE_NO_XHTML;
218      }
219      if options.xhtml {
220        xml_options += xmlSaveOption_XML_SAVE_XHTML;
221      }
222      if options.as_xml {
223        xml_options += xmlSaveOption_XML_SAVE_AS_XML;
224      }
225      if options.as_html {
226        xml_options += xmlSaveOption_XML_SAVE_AS_HTML;
227      }
228      if options.non_significant_whitespace {
229        xml_options += xmlSaveOption_XML_SAVE_WSNONSIG;
230      }
231
232      let save_ctx = xmlSaveToBuffer(buf, c_utf8.as_ptr(), xml_options as i32);
233      let _size = xmlSaveDoc(save_ctx, self.doc_ptr());
234      let _size = xmlSaveClose(save_ctx);
235
236      let result = xmlBufferContent(buf);
237      let c_string = CStr::from_ptr(result as *const c_char);
238      let node_string = c_string.to_string_lossy().into_owned();
239      xmlBufferFree(buf);
240
241      node_string
242    }
243  }
244
245  /// Serializes a `Node` owned by this `Document`
246  pub fn node_to_string(&self, node: &Node) -> String {
247    unsafe {
248      // allocate a buffer to dump into
249      let buf = xmlBufferCreate();
250
251      // dump the node
252      xmlNodeDump(
253        buf,
254        self.doc_ptr(),
255        node.node_ptr(),
256        1, // level of indentation
257        0, /* disable formatting */
258      );
259      let result = xmlBufferContent(buf);
260      let c_string = CStr::from_ptr(result as *const c_char);
261      let node_string = c_string.to_string_lossy().into_owned();
262      xmlBufferFree(buf);
263
264      node_string
265    }
266  }
267  /// Serializes a `RoNode` owned by this `Document`
268  pub fn ronode_to_string(&self, node: &RoNode) -> String {
269    unsafe {
270      // allocate a buffer to dump into
271      let buf = xmlBufferCreate();
272
273      // dump the node
274      xmlNodeDump(
275        buf,
276        self.doc_ptr(),
277        node.node_ptr(),
278        1, // level of indentation
279        0, /* disable formatting */
280      );
281      let result = xmlBufferContent(buf);
282      let c_string = CStr::from_ptr(result as *const c_char);
283      let node_string = c_string.to_string_lossy().into_owned();
284      xmlBufferFree(buf);
285
286      node_string
287    }
288  }
289
290  /// Creates a node for an XML processing instruction
291  pub fn create_processing_instruction(&mut self, name: &str, content: &str) -> Result<Node, ()> {
292    unsafe {
293      let c_name = CString::new(name).unwrap();
294      let c_name_bytes = c_name.as_bytes();
295      let c_content = CString::new(content).unwrap();
296      let c_content_bytes = c_content.as_bytes();
297
298      let node_ptr: xmlNodePtr = xmlNewDocPI(
299        self.doc_ptr(),
300        c_name_bytes.as_ptr(),
301        c_content_bytes.as_ptr(),
302      );
303      if node_ptr.is_null() {
304        Err(())
305      } else {
306        Ok(self.register_node(node_ptr))
307      }
308    }
309  }
310
311  /// Cast the document as a libxml Node
312  pub fn as_node(&self) -> Node {
313    // Note: this method is important to keep, as it enables certain low-level libxml2 idioms
314    // In particular, method dispatch based on NodeType is only possible when the document can be cast as a Node
315    //
316    // Memory management is not an issue, as a document node can not be unbound/removed, and does not require
317    // any additional deallocation than the Drop of a Document object.
318    self.register_node(self.doc_ptr() as xmlNodePtr)
319  }
320
321  /// Duplicates the libxml2 Document into a new instance
322  pub fn dup(&self) -> Result<Self, ()> {
323    let doc_ptr = unsafe { xmlCopyDoc(self.doc_ptr(), 1) };
324    if doc_ptr.is_null() {
325      Err(())
326    } else {
327      let doc = _Document {
328        doc_ptr,
329        nodes: HashMap::new(),
330      };
331      Ok(Document(Rc::new(RefCell::new(doc))))
332    }
333  }
334
335  /// Duplicates a source libxml2 Document into the empty Document self
336  pub fn dup_from(&mut self, source: &Self) -> Result<(), ()> {
337    if !self.doc_ptr().is_null() {
338      return Err(());
339    }
340
341    let doc_ptr = unsafe { xmlCopyDoc(source.doc_ptr(), 1) };
342    if doc_ptr.is_null() {
343      return Err(());
344    }
345    self.0.borrow_mut().doc_ptr = doc_ptr;
346    Ok(())
347  }
348}