View Javadoc
1   /*
2    * SPDX-FileCopyrightText: Copyright (c) 2012-2025 Yegor Bugayenko
3    * SPDX-License-Identifier: MIT
4    */
5   package com.jcabi.xml;
6   
7   import java.util.Collection;
8   import java.util.List;
9   import javax.xml.namespace.NamespaceContext;
10  import org.w3c.dom.Node;
11  import org.w3c.dom.ls.LSResourceResolver;
12  import org.xml.sax.SAXParseException;
13  
14  /**
15   * XML document.
16   *
17   * <p>Set of convenient XML manipulations:
18   *
19   * <pre> XML xml = new XMLDocument(content);
20   * for (XML employee : xml.nodes("//Employee")) {
21   *   String name = employee.xpath("name/text()").get(0);
22   *   // ...
23   * }</pre>
24   *
25   * <p>You can always get DOM node out of this abstraction using {@link #inner()}
26   * or {@link #deepCopy()} methods.
27   *
28   * <p>{@code toString()} must produce a full XML.
29   *
30   * <p>Implementation of this interface must be immutable and thread-safe.
31   *
32   * <p> In most cases, you can use the {@link XMLDocument} implementation. It
33   * implements all required features and will be sufficient for most practical tasks.
34   * The only problem with that implementation is that it uses javax.xml classes under
35   * the hood. The issue with the default java implementation is that it only supports
36   * XPath 1.0. If you require XPath 2.0 support and beyond, you can use the Saxon
37   * implementation of {@link XML} - {@link SaxonDocument}. It is based on the Saxon
38   * library and supports XPath 2.0 and higher.
39   * You can read more about Java XPath versioning problems in the following threads:
40   * <ul>
41   *   <li><a href="https://stackoverflow.com/questions/6624149/xpath-2-0-for-java-possible">xpath 2.0 for java possible</a></li>
42   *   <li><a href="https://stackoverflow.com/questions/5802895/does-jdk-6-support-all-features-of-xpath-2-0/5803028#5803028">does JDK 6 support all features of XPath 2.0?</a></li>
43   * </ul>
44   *
45   * @see XMLDocument
46   * @since 0.1
47   * @checkstyle AbbreviationAsWordInNameCheck (5 lines)
48   */
49  public interface XML {
50  
51      /**
52       * Find and return text elements or attributes matched by XPath address.
53       *
54       * <p>The XPath query should point to text elements or attributes in the
55       * XML document. If any nodes of different types (elements, comments, etc.)
56       * are found in result node list -
57       * a {@link RuntimeException} will be thrown.
58       *
59       * <p>Alternatively, the XPath query can be a function or expression that
60       * returns a single value instead of pointing to a set of nodes. In this
61       * case, the result will be a List containing a single String, the content
62       * of which is the result of the evaluation. If the expression result is not
63       * a String, it will be converted to a String representation and returned as
64       * such. For example, a document containing three &lt;a&gt; elements,
65       * the input query "count(//a)", will return a singleton List with a single
66       * string value "3".
67       *
68       * <p>This is a convenient method, which is used (according to our
69       * experience) in 95% of all cases. Usually you don't need to get anything
70       * else but a text value of some node or an attribute. And in most cases
71       * you are interested to get just the first value
72       * (use {@code xpath(..).get(0)}). But when/if you need to get more than
73       * just a plain text - use {@link #nodes(String)}.
74       *
75       * <p>The {@link List} returned will throw {@link IndexOutOfBoundsException}
76       * if you try to access a node which wasn't found by this XPath query.
77       *
78       * <p>An {@link IllegalArgumentException} is thrown if the parameter
79       * passed is not a valid XPath expression.
80       *
81       * @param query The XPath query
82       * @return The list of string values (texts) or single function result
83       */
84      List<String> xpath(String query);
85  
86      /**
87       * Retrieve DOM nodes from the XML response.
88       *
89       * <p>The {@link List} returned will throw {@link IndexOutOfBoundsException}
90       * if you try to access a node which wasn't found by this XPath query.
91       *
92       * <p>An {@link IllegalArgumentException} is thrown if the parameter
93       * passed is not a valid XPath expression.
94       *
95       * @param query The XPath query
96       * @return Collection of DOM nodes
97       */
98      List<XML> nodes(String query);
99  
100     /**
101      * Register additional namespace prefix for XPath.
102      *
103      * <p>For example:
104      *
105      * <pre>
106      * String name = new XMLDocument("...")
107      *   .registerNs("ns1", "http://example.com")
108      *   .registerNs("foo", "http://example.com/foo")
109      *   .xpath("/ns1:root/foo:name/text()")
110      *   .get(0);
111      * </pre>
112      *
113      * <p>A number of standard namespaces are registered by default in
114      * instances of XML. Their
115      * full list is in {@link XMLDocument#XMLDocument(String)}.
116      *
117      * <p>If a namespace prefix is already registered an
118      * {@link IllegalArgumentException} will be thrown.
119      *
120      * @param prefix The XPath prefix to register
121      * @param uri Namespace URI
122      * @return A new XML document, with this additional namespace registered
123      */
124     XML registerNs(String prefix, Object uri);
125 
126     /**
127      * Append this namespace context to the existing one.
128      *
129      * <p>The existing context (inside this object) and the new one provided
130      * will be merged together. The existing context will have higher
131      * priority.
132      *
133      * @param context The context to append
134      * @return A new XML document, with a merged context on board
135      */
136     XML merge(NamespaceContext context);
137 
138     /**
139      * Retrieve DOM node, represented by this wrapper.
140      * This method works exactly the same as {@link #deepCopy()}.
141      * @return Deep copy of the inner DOM node.
142      * @deprecated Use {@link #inner()} or {@link #deepCopy()} instead.
143      */
144     @Deprecated
145     Node node();
146 
147     /**
148      * Retrieve DOM node, represented by this wrapper.
149      * Pay attention that this method returns inner node, not a deep copy.
150      * It means that any changes to the returned node will affect the original XML.
151      * @return Inner node.
152      */
153     Node inner();
154 
155     /**
156      * Retrieve a deep copy of the DOM node, represented by this wrapper.
157      * Might be expensive in terms of performance.
158      * @return Deep copy of the node.
159      */
160     Node deepCopy();
161 
162     /**
163      * Validate this XML against the XSD schema inside it.
164      *
165      * <p>If you don't have your own resolver, try using
166      * {@link ClasspathResolver}.</p>
167      *
168      * @param resolver XSD schema resolver
169      * @return List of errors found
170      * @since 0.31.0
171      */
172     Collection<SAXParseException> validate(LSResourceResolver resolver);
173 
174     /**
175      * Validate this XML against the provided XSD schema.
176      * @param xsd The Schema
177      * @return List of errors found
178      * @since 0.31.0
179      */
180     Collection<SAXParseException> validate(XML xsd);
181 }