View Javadoc
1   /*
2    * Copyright 2013 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.xml;
17  
18  import static io.netty.util.internal.ObjectUtil.checkPositive;
19  
20  import io.netty.buffer.ByteBuf;
21  import io.netty.channel.ChannelHandlerContext;
22  import io.netty.handler.codec.ByteToMessageDecoder;
23  import io.netty.handler.codec.CorruptedFrameException;
24  import io.netty.handler.codec.TooLongFrameException;
25  
26  import java.util.List;
27  
28  /**
29   * A frame decoder for single separate XML based message streams.
30   * <p/>
31   * A couple examples will better help illustrate
32   * what this decoder actually does.
33   * <p/>
34   * Given an input array of bytes split over 3 frames like this:
35   * <pre>
36   * +-----+-----+-----------+
37   * | &lt;an | Xml | Element/&gt; |
38   * +-----+-----+-----------+
39   * </pre>
40   * <p/>
41   * this decoder would output a single frame:
42   * <p/>
43   * <pre>
44   * +-----------------+
45   * | &lt;anXmlElement/&gt; |
46   * +-----------------+
47   * </pre>
48   *
49   * Given an input array of bytes split over 5 frames like this:
50   * <pre>
51   * +-----+-----+-----------+-----+----------------------------------+
52   * | &lt;an | Xml | Element/&gt; | &lt;ro | ot&gt;&lt;child&gt;content&lt;/child&gt;&lt;/root&gt; |
53   * +-----+-----+-----------+-----+----------------------------------+
54   * </pre>
55   * <p/>
56   * this decoder would output two frames:
57   * <p/>
58   * <pre>
59   * +-----------------+-------------------------------------+
60   * | &lt;anXmlElement/&gt; | &lt;root&gt;&lt;child&gt;content&lt;/child&gt;&lt;/root&gt; |
61   * +-----------------+-------------------------------------+
62   * </pre>
63   *
64   * <p/>
65   * The byte stream is expected to be in UTF-8 character encoding or ASCII. The current implementation
66   * uses direct {@code byte} to {@code char} cast and then compares that {@code char} to a few low range
67   * ASCII characters like {@code '<'}, {@code '>'} or {@code '/'}. UTF-8 is not using low range [0..0x7F]
68   * byte values for multibyte codepoint representations therefore fully supported by this implementation.
69   * <p/>
70   * Please note that this decoder is not suitable for
71   * xml streaming protocols such as
72   * <a href="https://xmpp.org/rfcs/rfc6120.html">XMPP</a>,
73   * where an initial xml element opens the stream and only
74   * gets closed at the end of the session, although this class
75   * could probably allow for such type of message flow with
76   * minor modifications.
77   */
78  public class XmlFrameDecoder extends ByteToMessageDecoder {
79  
80      private final int maxFrameLength;
81  
82      public XmlFrameDecoder(int maxFrameLength) {
83          this.maxFrameLength = checkPositive(maxFrameLength, "maxFrameLength");
84      }
85  
86      @Override
87      protected void decode(ChannelHandlerContext ctx, ByteBuf in, List<Object> out) throws Exception {
88          boolean openingBracketFound = false;
89          boolean atLeastOneXmlElementFound = false;
90          boolean inCDATASection = false;
91          long openBracketsCount = 0;
92          int length = 0;
93          int leadingWhiteSpaceCount = 0;
94          final int bufferLength = in.writerIndex();
95  
96          if (bufferLength > maxFrameLength) {
97              // bufferLength exceeded maxFrameLength; dropping frame
98              in.skipBytes(in.readableBytes());
99              fail(bufferLength);
100             return;
101         }
102 
103         for (int i = in.readerIndex(); i < bufferLength; i++) {
104             final byte readByte = in.getByte(i);
105             if (!openingBracketFound && Character.isWhitespace(readByte)) {
106                 // xml has not started and whitespace char found
107                 leadingWhiteSpaceCount++;
108             } else if (!openingBracketFound && readByte != '<') {
109                 // garbage found before xml start
110                 fail(ctx);
111                 in.skipBytes(in.readableBytes());
112                 return;
113             } else if (!inCDATASection && readByte == '<') {
114                 openingBracketFound = true;
115 
116                 if (i < bufferLength - 1) {
117                     final byte peekAheadByte = in.getByte(i + 1);
118                     if (peekAheadByte == '/') {
119                         // found </, we must check if it is enclosed
120                         int peekFurtherAheadIndex = i + 2;
121                         while (peekFurtherAheadIndex <= bufferLength - 1) {
122                             //if we have </ and enclosing > we can decrement openBracketsCount
123                             if (in.getByte(peekFurtherAheadIndex) == '>') {
124                                 openBracketsCount--;
125                                 break;
126                             }
127                             peekFurtherAheadIndex++;
128                         }
129                     } else if (isValidStartCharForXmlElement(peekAheadByte)) {
130                         atLeastOneXmlElementFound = true;
131                         // char after < is a valid xml element start char,
132                         // incrementing openBracketsCount
133                         openBracketsCount++;
134                     } else if (peekAheadByte == '!') {
135                         if (isCommentBlockStart(in, i)) {
136                             // <!-- comment --> start found
137                             openBracketsCount++;
138                         } else if (isCDATABlockStart(in, i)) {
139                             // <![CDATA[ start found
140                             openBracketsCount++;
141                             inCDATASection = true;
142                         }
143                     } else if (peekAheadByte == '?') {
144                         // <?xml ?> start found
145                         openBracketsCount++;
146                     }
147                 }
148             } else if (!inCDATASection && readByte == '/') {
149                 if (i < bufferLength - 1 && in.getByte(i + 1) == '>') {
150                     // found />, decrementing openBracketsCount
151                     openBracketsCount--;
152                 }
153             } else if (readByte == '>') {
154                 length = i + 1;
155 
156                 if (i - 1 > -1) {
157                     final byte peekBehindByte = in.getByte(i - 1);
158 
159                     if (!inCDATASection) {
160                         if (peekBehindByte == '?') {
161                             // an <?xml ?> tag was closed
162                             openBracketsCount--;
163                         } else if (peekBehindByte == '-' && i - 2 > -1 && in.getByte(i - 2) == '-') {
164                             // a <!-- comment --> was closed
165                             openBracketsCount--;
166                         }
167                     } else if (peekBehindByte == ']' && i - 2 > -1 && in.getByte(i - 2) == ']') {
168                         // a <![CDATA[...]]> block was closed
169                         openBracketsCount--;
170                         inCDATASection = false;
171                     }
172                 }
173 
174                 if (atLeastOneXmlElementFound && openBracketsCount == 0) {
175                     // xml is balanced, bailing out
176                     break;
177                 }
178             }
179         }
180 
181         final int readerIndex = in.readerIndex();
182         int xmlElementLength = length - readerIndex;
183 
184         if (openBracketsCount == 0 && xmlElementLength > 0) {
185             if (readerIndex + xmlElementLength >= bufferLength) {
186                 xmlElementLength = in.readableBytes();
187             }
188             final ByteBuf frame =
189                     extractFrame(in, readerIndex + leadingWhiteSpaceCount, xmlElementLength - leadingWhiteSpaceCount);
190             in.skipBytes(xmlElementLength);
191             out.add(frame);
192         }
193     }
194 
195     private void fail(long frameLength) {
196         if (frameLength > 0) {
197             throw new TooLongFrameException(
198                             "frame length exceeds " + maxFrameLength + ": " + frameLength + " - discarded");
199         } else {
200             throw new TooLongFrameException(
201                             "frame length exceeds " + maxFrameLength + " - discarding");
202         }
203     }
204 
205     private static void fail(ChannelHandlerContext ctx) {
206         ctx.fireExceptionCaught(new CorruptedFrameException("frame contains content before the xml starts"));
207     }
208 
209     private static ByteBuf extractFrame(ByteBuf buffer, int index, int length) {
210         return buffer.copy(index, length);
211     }
212 
213     /**
214      * Asks whether the given byte is a valid
215      * start char for an xml element name.
216      * <p/>
217      * Please refer to the
218      * <a href="https://www.w3.org/TR/2004/REC-xml11-20040204/#NT-NameStartChar">NameStartChar</a>
219      * formal definition in the W3C XML spec for further info.
220      *
221      * @param b the input char
222      * @return true if the char is a valid start char
223      */
224     private static boolean isValidStartCharForXmlElement(final byte b) {
225         return b >= 'a' && b <= 'z' || b >= 'A' && b <= 'Z' || b == ':' || b == '_';
226     }
227 
228     private static boolean isCommentBlockStart(final ByteBuf in, final int i) {
229         return i < in.writerIndex() - 3
230                 && in.getByte(i + 2) == '-'
231                 && in.getByte(i + 3) == '-';
232     }
233 
234     private static boolean isCDATABlockStart(final ByteBuf in, final int i) {
235         return i < in.writerIndex() - 8
236                 && in.getByte(i + 2) == '['
237                 && in.getByte(i + 3) == 'C'
238                 && in.getByte(i + 4) == 'D'
239                 && in.getByte(i + 5) == 'A'
240                 && in.getByte(i + 6) == 'T'
241                 && in.getByte(i + 7) == 'A'
242                 && in.getByte(i + 8) == '[';
243     }
244 
245 }