View Javadoc

1   /*
2    * J.A.D.E. Java(TM) Addition to Default Environment.
3    * Latest release available at http://jade.dautelle.com/
4    * This class is public domain (not copyrighted).
5    */
6   package ch.twiddlefinger.inet.rewinder.model.parser.conversion;
7   
8   import java.io.CharConversionException;
9   import java.io.IOException;
10  import java.io.OutputStream;
11  import java.io.Writer;
12  
13  
14  /***
15   * <p> This class represents an UTF-8 stream writer.</p>
16   *
17   * <p> This writer supports surrogate <code>char</code> pairs (representing
18   *     characters in the range [U+10000 .. U+10FFFF]). It can also be used
19   *     to write characters from their unicodes (31 bits) directly
20   *     (ref. {@link #write(int)}).</p>
21   *
22   * <p> Instances of this class can be reused for different output streams
23   *     and can be part of a higher level component (e.g. serializer) in order
24   *     to avoid dynamic buffer allocation when the destination output changes.
25   *     Also wrapping using a <code>java.io.BufferedWriter</code> is unnescessary
26   *     as instances of this class embed their own data buffers.</p>
27  
28   * <p> Note: This writer is unsynchronized and always produces well-formed
29   *           UTF-8 sequences.</p>
30   *
31   *  <p><i> This class is <b>public domain</b> (not copyrighted).</i></p>
32   *
33   * @author  <a href="mailto:jean-marie@dautelle.com">Jean-Marie Dautelle</a>
34   * @version 4.6, July 14, 2003
35   * @see     Utf8StreamReader
36   */
37  public final class Utf8StreamWriter extends Writer {
38      /***
39   * Holds the current output stream or <code>null</code> if closed.
40   */
41      private OutputStream _outStream;
42  
43      /***
44   * Holds the bytes' buffer.
45   */
46      private final byte[] _bytes;
47  
48      /***
49   * Holds the bytes buffer index.
50   */
51      private int _index;
52      private char _highSurrogate;
53  
54      /***
55   * Default constructor.
56   */
57      public Utf8StreamWriter() {
58          this(2048);
59      }
60  
61      /***
62   * Creates a {@link Utf8StreamWriter} of specified buffer size.
63   *
64   * @param  bufferSize the buffer size in bytes.
65   */
66      public Utf8StreamWriter(int bufferSize) {
67          _bytes = new byte[bufferSize];
68      }
69  
70      /***
71   * Sets the output stream to use for writing until this writer is closed.
72   * For example:<pre>
73   *     Writer writer = new Utf8StreamWriter().setOutputStream(outStream);
74   * </pre> is equivalent but writes faster than <pre>
75   *     Writer writer = new java.io.OutputStreamWriter(outStream, "UTF-8");
76   * </pre>
77   *
78   * @param  outStream the output stream.
79   * @return this UTF-8 writer.
80   * @see    #close
81   */
82      public Utf8StreamWriter setOutputStream(OutputStream outStream) {
83          _outStream = outStream;
84  
85          return this;
86      }
87  
88      /***
89   * Writes a single character. This method supports 16-bits
90   * character surrogates.
91   *
92   * @param  c <code>char</code> the character to be written (possibly
93   *        a surrogate).
94   * @throws IOException if an I/O error occurs.
95   */
96      public void write(char c) throws IOException {
97          if ((c < 0xd800) || (c > 0xdfff)) {
98              write((int) c);
99          } else if (c < 0xdc00) { // High surrogate.
100             _highSurrogate = c;
101         } else { // Low surrogate.
102 
103             int code = ((_highSurrogate - 0xd800) << 10) + (c - 0xdc00) +
104                 0x10000;
105             write(code);
106         }
107     }
108 
109     /***
110  * Writes a character given its 31-bits Unicode.
111  *
112  * @param  code the 31 bits Unicode of the character to be written.
113  * @throws IOException if an I/O error occurs.
114  */
115     public void write(int code) throws IOException {
116         if ((code & 0xffffff80) == 0) {
117             _bytes[_index] = (byte) code;
118 
119             if (++_index >= _bytes.length) {
120                 flushBuffer();
121             }
122         } else { // Writes more than one byte.
123             write2(code);
124         }
125     }
126 
127     private void write2(int c) throws IOException {
128         if ((c & 0xfffff800) == 0) { // 2 bytes.
129             _bytes[_index] = (byte) (0xc0 | (c >> 6));
130 
131             if (++_index >= _bytes.length) {
132                 flushBuffer();
133             }
134 
135             _bytes[_index] = (byte) (0x80 | (c & 0x3f));
136 
137             if (++_index >= _bytes.length) {
138                 flushBuffer();
139             }
140         } else if ((c & 0xffff0000) == 0) { // 3 bytes.
141             _bytes[_index] = (byte) (0xe0 | (c >> 12));
142 
143             if (++_index >= _bytes.length) {
144                 flushBuffer();
145             }
146 
147             _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f));
148 
149             if (++_index >= _bytes.length) {
150                 flushBuffer();
151             }
152 
153             _bytes[_index] = (byte) (0x80 | (c & 0x3f));
154 
155             if (++_index >= _bytes.length) {
156                 flushBuffer();
157             }
158         } else if ((c & 0xff200000) == 0) { // 4 bytes.
159             _bytes[_index] = (byte) (0xf0 | (c >> 18));
160 
161             if (++_index >= _bytes.length) {
162                 flushBuffer();
163             }
164 
165             _bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3f));
166 
167             if (++_index >= _bytes.length) {
168                 flushBuffer();
169             }
170 
171             _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f));
172 
173             if (++_index >= _bytes.length) {
174                 flushBuffer();
175             }
176 
177             _bytes[_index] = (byte) (0x80 | (c & 0x3f));
178 
179             if (++_index >= _bytes.length) {
180                 flushBuffer();
181             }
182         } else if ((c & 0xf4000000) == 0) { // 5 bytes.
183             _bytes[_index] = (byte) (0xf8 | (c >> 24));
184 
185             if (++_index >= _bytes.length) {
186                 flushBuffer();
187             }
188 
189             _bytes[_index] = (byte) (0x80 | ((c >> 18) & 0x3f));
190 
191             if (++_index >= _bytes.length) {
192                 flushBuffer();
193             }
194 
195             _bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3f));
196 
197             if (++_index >= _bytes.length) {
198                 flushBuffer();
199             }
200 
201             _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f));
202 
203             if (++_index >= _bytes.length) {
204                 flushBuffer();
205             }
206 
207             _bytes[_index] = (byte) (0x80 | (c & 0x3f));
208 
209             if (++_index >= _bytes.length) {
210                 flushBuffer();
211             }
212         } else if ((c & 0x80000000) == 0) { // 6 bytes.
213             _bytes[_index] = (byte) (0xfc | (c >> 30));
214 
215             if (++_index >= _bytes.length) {
216                 flushBuffer();
217             }
218 
219             _bytes[_index] = (byte) (0x80 | ((c >> 24) & 0x3f));
220 
221             if (++_index >= _bytes.length) {
222                 flushBuffer();
223             }
224 
225             _bytes[_index] = (byte) (0x80 | ((c >> 18) & 0x3f));
226 
227             if (++_index >= _bytes.length) {
228                 flushBuffer();
229             }
230 
231             _bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3F));
232 
233             if (++_index >= _bytes.length) {
234                 flushBuffer();
235             }
236 
237             _bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3F));
238 
239             if (++_index >= _bytes.length) {
240                 flushBuffer();
241             }
242 
243             _bytes[_index] = (byte) (0x80 | (c & 0x3F));
244 
245             if (++_index >= _bytes.length) {
246                 flushBuffer();
247             }
248         } else {
249             throw new CharConversionException("Illegal character U+" +
250                 Integer.toHexString(c));
251         }
252     }
253 
254     /***
255  * Writes a portion of an array of characters.
256  *
257  * @param  cbuf the array of characters.
258  * @param  off the offset from which to start writing characters.
259  * @param  len the number of characters to write.
260  * @throws IOException if an I/O error occurs.
261  */
262     public void write(char[] cbuf, int off, int len) throws IOException {
263         final int off_plus_len = off + len;
264 
265         for (int i = off; i < off_plus_len; i++) {
266             write(cbuf[i]);
267         }
268     }
269 
270     /***
271  * Writes a portion of a string.
272  *
273  * @param  str a String.
274  * @param  off the offset from which to start writing characters.
275  * @param  len the number of characters to write.
276  * @throws IOException if an I/O error occurs
277  */
278     public void write(String str, int off, int len) throws IOException {
279         final int off_plus_len = off + len;
280 
281         for (int i = off; i < off_plus_len; i++) {
282             write(str.charAt(i));
283         }
284     }
285 
286     /***
287  * Flushes the stream.  If the stream has saved any characters from the
288  * various write() methods in a buffer, write them immediately to their
289  * intended destination.  Then, if that destination is another character or
290  * byte stream, flush it.  Thus one flush() invocation will flush all the
291  * buffers in a chain of Writers and OutputStreams.
292  *
293  * @throws IOException if an I/O error occurs.
294  */
295     public void flush() throws IOException {
296         flushBuffer();
297         _outStream.flush();
298     }
299 
300     /***
301  * Closes the stream, flushing it first.  Once a stream has been closed,
302  * further write() or flush() invocations will cause an IOException to be
303  * thrown.  Closing a previously-closed stream, however, has no effect.
304  *
305  * @exception  IOException  If an I/O error occurs
306  */
307     public void close() throws IOException {
308         if (_outStream != null) {
309             flushBuffer();
310             _outStream.close();
311             _outStream = null;
312         }
313     }
314 
315     /***
316  * Flushes the internal bytes buffer.
317  *
318  * @throws IOException if an I/O error occurs
319  */
320     private void flushBuffer() throws IOException {
321         if (_outStream != null) {
322             _outStream.write(_bytes, 0, _index);
323             _index = 0;
324         } else {
325             throw new IOException("Stream closed");
326         }
327     }
328 }