View Javadoc
1   /*
2    * SPDX-FileCopyrightText: Copyright (c) 2012-2025 Yegor Bugayenko
3    * SPDX-License-Identifier: MIT
4    */
5   package com.jcabi.xml;
6   
7   import com.jcabi.log.Logger;
8   import java.io.ByteArrayInputStream;
9   import java.io.File;
10  import java.io.IOException;
11  import java.nio.charset.StandardCharsets;
12  import javax.xml.parsers.DocumentBuilder;
13  import javax.xml.parsers.DocumentBuilderFactory;
14  import javax.xml.parsers.ParserConfigurationException;
15  import lombok.EqualsAndHashCode;
16  import lombok.ToString;
17  import org.w3c.dom.Document;
18  import org.xml.sax.SAXException;
19  
20  /**
21   * Convenient parser of XML to DOM.
22   *
23   * <p>Objects of this class are immutable and thread-safe.
24   *
25   * @since 0.1
26   */
27  @ToString
28  @EqualsAndHashCode
29  final class DomParser {
30  
31      /**
32       * Document builder factory to use for parsing.
33       */
34      private final transient DocumentBuilderFactory factory;
35  
36      /**
37       * Source of XML.
38       */
39      private final DocSource source;
40  
41      /**
42       * Public ctor.
43       *
44       * <p>An {@link IllegalArgumentException} may be thrown if the parameter
45       * passed is not in XML format. It doesn't perform a strict validation
46       * and is not guaranteed that an exception will be thrown whenever
47       * the parameter is not XML.
48       *
49       * <p>It is assumed that the text is in UTF-8.
50       *
51       * @param fct Document builder factory to use
52       * @param txt The XML in text (in UTF-8)
53       */
54      DomParser(final DocumentBuilderFactory fct, final String txt) {
55          this(fct, new BytesSource(txt));
56      }
57  
58      /**
59       * Public ctor.
60       *
61       * <p>An {@link IllegalArgumentException} may be thrown if the parameter
62       * passed is not in XML format. It doesn't perform a strict validation
63       * and is not guaranteed that an exception will be thrown whenever
64       * the parameter is not XML.
65       *
66       * @param fct Document builder factory to use
67       * @param bytes The XML in bytes
68       */
69      @SuppressWarnings("PMD.ArrayIsStoredDirectly")
70      DomParser(final DocumentBuilderFactory fct, final byte[] bytes) {
71          this(fct, new BytesSource(bytes));
72      }
73  
74      /**
75       * Public ctor.
76       *
77       * <p>An {@link IllegalArgumentException} may be thrown if the parameter
78       * passed is not in XML format. It doesn't perform a strict validation
79       * and is not guaranteed that an exception will be thrown whenever
80       * the parameter is not XML.
81       *
82       * @param fct Document builder factory to use
83       * @param file The XML as a file
84       */
85      DomParser(final DocumentBuilderFactory fct, final File file) {
86          this(fct, new FileSource(file));
87      }
88  
89      /**
90       * Private ctor.
91       * @param factory Document builder factory to use
92       * @param source Source of XML
93       */
94      private DomParser(final DocumentBuilderFactory factory, final DocSource source) {
95          this.factory = factory;
96          this.source = source;
97      }
98  
99      /**
100      * Get the document body.
101      * @return The document
102      */
103     @SuppressWarnings("PMD.PrematureDeclaration")
104     public Document document() {
105         final DocumentBuilder builder;
106         try {
107             builder = this.factory.newDocumentBuilder();
108         } catch (final ParserConfigurationException ex) {
109             throw new IllegalArgumentException(
110                 String.format(
111                     "Failed to create document builder by %s",
112                     this.factory.getClass().getName()
113                 ),
114                 ex
115             );
116         }
117         final long start = System.nanoTime();
118         final Document doc;
119         try {
120             doc = this.source.apply(builder);
121         } catch (final IOException | SAXException ex) {
122             throw new IllegalArgumentException(
123                 String.format(
124                     "Can't parse by %s, most probably the XML is invalid",
125                     builder.getClass().getName()
126                 ),
127                 ex
128             );
129         }
130         if (Logger.isTraceEnabled(this)) {
131             Logger.trace(
132                 this,
133                 "%s parsed %d bytes of XML in %[nano]s",
134                 builder.getClass().getName(),
135                 this.source.length(),
136                 System.nanoTime() - start
137             );
138         }
139         return doc;
140     }
141 
142     /**
143      * Source of XML.
144      * @since 0.32
145      */
146     private interface DocSource {
147 
148         /**
149          * Parse XML by the builder.
150          * @param builder The builder to use during parsing.
151          * @return The document.
152          * @throws IOException If fails.
153          * @throws SAXException If fails.
154          */
155         Document apply(DocumentBuilder builder) throws IOException, SAXException;
156 
157         /**
158          * The length of the source.
159          * @return The length.
160          */
161         long length();
162     }
163 
164     /**
165      * File source of XML from a file.
166      * @since 0.32
167      */
168     private static class FileSource implements DocSource {
169 
170         /**
171          * The file.
172          */
173         private final File file;
174 
175         /**
176          * Public ctor.
177          * @param file The file.
178          */
179         FileSource(final File file) {
180             this.file = file;
181         }
182 
183         @Override
184         public Document apply(final DocumentBuilder builder) throws IOException, SAXException {
185             return builder.parse(this.file);
186         }
187 
188         @Override
189         public long length() {
190             return this.file.length();
191         }
192     }
193 
194     /**
195      * Bytes source of XML.
196      * @since 0.32
197      */
198     private static class BytesSource implements DocSource {
199 
200         /**
201          * Bytes of the XML.
202          */
203         private final byte[] xml;
204 
205         /**
206          * Public ctor.
207          * @param xml Bytes of the XML.
208          */
209         BytesSource(final String xml) {
210             this(xml.getBytes(StandardCharsets.UTF_8));
211         }
212 
213         /**
214          * Public ctor.
215          * @param xml Bytes of the XML.
216          */
217         @SuppressWarnings("PMD.ArrayIsStoredDirectly")
218         BytesSource(final byte[] xml) {
219             this.xml = xml;
220         }
221 
222         @Override
223         public Document apply(final DocumentBuilder builder) throws IOException, SAXException {
224             return builder.parse(new ByteArrayInputStream(this.xml));
225         }
226 
227         @Override
228         public long length() {
229             return this.xml.length;
230         }
231     }
232 }