View Javadoc
1   /*
2    * Copyright 2013 The Netty Project
3    *
4    * The Netty Project licenses this file to you under the Apache License,
5    * version 2.0 (the "License"); you may not use this file except in compliance
6    * with the License. You may obtain a copy of the License at:
7    *
8    *   http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package io.netty.handler.codec.xml;
17  
18  import io.netty.buffer.ByteBuf;
19  import io.netty.channel.ChannelHandlerContext;
20  import io.netty.handler.codec.ByteToMessageDecoder;
21  import io.netty.handler.codec.CorruptedFrameException;
22  import io.netty.handler.codec.TooLongFrameException;
23  
24  import java.util.List;
25  
26  /**
27   * A frame decoder for single separate XML based message streams.
28   * <p/>
29   * A couple examples will better help illustrate
30   * what this decoder actually does.
31   * <p/>
32   * Given an input array of bytes split over 3 frames like this:
33   * <pre>
34   * +-----+-----+-----------+
35   * | &lt;an | Xml | Element/&gt; |
36   * +-----+-----+-----------+
37   * </pre>
38   * <p/>
39   * this decoder would output a single frame:
40   * <p/>
41   * <pre>
42   * +-----------------+
43   * | &lt;anXmlElement/&gt; |
44   * +-----------------+
45   * </pre>
46   *
47   * Given an input array of bytes split over 5 frames like this:
48   * <pre>
49   * +-----+-----+-----------+-----+----------------------------------+
50   * | &lt;an | Xml | Element/&gt; | &lt;ro | ot&gt;&lt;child&gt;content&lt;/child&gt;&lt;/root&gt; |
51   * +-----+-----+-----------+-----+----------------------------------+
52   * </pre>
53   * <p/>
54   * this decoder would output two frames:
55   * <p/>
56   * <pre>
57   * +-----------------+-------------------------------------+
58   * | &lt;anXmlElement/&gt; | &lt;root&gt;&lt;child&gt;content&lt;/child&gt;&lt;/root&gt; |
59   * +-----------------+-------------------------------------+
60   * </pre>
61   *
62   * Please note that this decoder is not suitable for
63   * xml streaming protocols such as
64   * <a href="http://xmpp.org/rfcs/rfc6120.html">XMPP</a>,
65   * where an initial xml element opens the stream and only
66   * gets closed at the end of the session, although this class
67   * could probably allow for such type of message flow with
68   * minor modifications.
69   */
70  public class XmlFrameDecoder extends ByteToMessageDecoder {
71  
72      private final int maxFrameLength;
73  
74      public XmlFrameDecoder(int maxFrameLength) {
75          if (maxFrameLength < 1) {
76              throw new IllegalArgumentException("maxFrameLength must be a positive int");
77          }
78          this.maxFrameLength = maxFrameLength;
79      }
80  
81      @Override
82      protected void decode(ChannelHandlerContext ctx, ByteBuf in, List<Object> out) throws Exception {
83          boolean openingBracketFound = false;
84          boolean atLeastOneXmlElementFound = false;
85          boolean inCDATASection = false;
86          long openBracketsCount = 0;
87          int length = 0;
88          int leadingWhiteSpaceCount = 0;
89          final int bufferLength = in.writerIndex();
90  
91          if (bufferLength > maxFrameLength) {
92              // bufferLength exceeded maxFrameLength; dropping frame
93              in.skipBytes(in.readableBytes());
94              fail(bufferLength);
95              return;
96          }
97  
98          for (int i = in.readerIndex(); i < bufferLength; i++) {
99              final byte readByte = in.getByte(i);
100             if (!openingBracketFound && Character.isWhitespace(readByte)) {
101                 // xml has not started and whitespace char found
102                 leadingWhiteSpaceCount++;
103             } else if (!openingBracketFound && readByte != '<') {
104                 // garbage found before xml start
105                 fail(ctx);
106                 in.skipBytes(in.readableBytes());
107                 return;
108             } else if (!inCDATASection && readByte == '<') {
109                 openingBracketFound = true;
110 
111                 if (i < bufferLength - 1) {
112                     final byte peekAheadByte = in.getByte(i + 1);
113                     if (peekAheadByte == '/') {
114                         // found </, we must check if it is enclosed
115                         int peekFurtherAheadIndex = i + 2;
116                         while (peekFurtherAheadIndex <= bufferLength - 1) {
117                             //if we have </ and enclosing > we can decrement openBracketsCount
118                             if (in.getByte(peekFurtherAheadIndex) == '>') {
119                                 openBracketsCount--;
120                                 break;
121                             }
122                             peekFurtherAheadIndex++;
123                         }
124                     } else if (isValidStartCharForXmlElement(peekAheadByte)) {
125                         atLeastOneXmlElementFound = true;
126                         // char after < is a valid xml element start char,
127                         // incrementing openBracketsCount
128                         openBracketsCount++;
129                     } else if (peekAheadByte == '!') {
130                         if (isCommentBlockStart(in, i)) {
131                             // <!-- comment --> start found
132                             openBracketsCount++;
133                         } else if (isCDATABlockStart(in, i)) {
134                             // <![CDATA[ start found
135                             openBracketsCount++;
136                             inCDATASection = true;
137                         }
138                     } else if (peekAheadByte == '?') {
139                         // <?xml ?> start found
140                         openBracketsCount++;
141                     }
142                 }
143             } else if (!inCDATASection && readByte == '/') {
144                 if (i < bufferLength - 1 && in.getByte(i + 1) == '>') {
145                     // found />, decrementing openBracketsCount
146                     openBracketsCount--;
147                 }
148             } else if (readByte == '>') {
149                 length = i + 1;
150 
151                 if (i - 1 > -1) {
152                     final byte peekBehindByte = in.getByte(i - 1);
153 
154                     if (!inCDATASection) {
155                         if (peekBehindByte == '?') {
156                             // an <?xml ?> tag was closed
157                             openBracketsCount--;
158                         } else if (peekBehindByte == '-' && i - 2 > -1 && in.getByte(i - 2) == '-') {
159                             // a <!-- comment --> was closed
160                             openBracketsCount--;
161                         }
162                     } else if (peekBehindByte == ']' && i - 2 > -1 && in.getByte(i - 2) == ']') {
163                         // a <![CDATA[...]]> block was closed
164                         openBracketsCount--;
165                         inCDATASection = false;
166                     }
167                 }
168 
169                 if (atLeastOneXmlElementFound && openBracketsCount == 0) {
170                     // xml is balanced, bailing out
171                     break;
172                 }
173             }
174         }
175 
176         final int readerIndex = in.readerIndex();
177         int xmlElementLength = length - readerIndex;
178 
179         if (openBracketsCount == 0 && xmlElementLength > 0) {
180             if (readerIndex + xmlElementLength >= bufferLength) {
181                 xmlElementLength = in.readableBytes();
182             }
183             final ByteBuf frame =
184                     extractFrame(in, readerIndex + leadingWhiteSpaceCount, xmlElementLength - leadingWhiteSpaceCount);
185             in.skipBytes(xmlElementLength);
186             out.add(frame);
187         }
188     }
189 
190     private void fail(long frameLength) {
191         if (frameLength > 0) {
192             throw new TooLongFrameException(
193                             "frame length exceeds " + maxFrameLength + ": " + frameLength + " - discarded");
194         } else {
195             throw new TooLongFrameException(
196                             "frame length exceeds " + maxFrameLength + " - discarding");
197         }
198     }
199 
200     private static void fail(ChannelHandlerContext ctx) {
201         ctx.fireExceptionCaught(new CorruptedFrameException("frame contains content before the xml starts"));
202     }
203 
204     private static ByteBuf extractFrame(ByteBuf buffer, int index, int length) {
205         return buffer.copy(index, length);
206     }
207 
208     /**
209      * Asks whether the given byte is a valid
210      * start char for an xml element name.
211      * <p/>
212      * Please refer to the
213      * <a href="http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-NameStartChar">NameStartChar</a>
214      * formal definition in the W3C XML spec for further info.
215      *
216      * @param b the input char
217      * @return true if the char is a valid start char
218      */
219     private static boolean isValidStartCharForXmlElement(final byte b) {
220         return b >= 'a' && b <= 'z' || b >= 'A' && b <= 'Z' || b == ':' || b == '_';
221     }
222 
223     private static boolean isCommentBlockStart(final ByteBuf in, final int i) {
224         return i < in.writerIndex() - 3
225                 && in.getByte(i + 2) == '-'
226                 && in.getByte(i + 3) == '-';
227     }
228 
229     private static boolean isCDATABlockStart(final ByteBuf in, final int i) {
230         return i < in.writerIndex() - 8
231                 && in.getByte(i + 2) == '['
232                 && in.getByte(i + 3) == 'C'
233                 && in.getByte(i + 4) == 'D'
234                 && in.getByte(i + 5) == 'A'
235                 && in.getByte(i + 6) == 'T'
236                 && in.getByte(i + 7) == 'A'
237                 && in.getByte(i + 8) == '[';
238     }
239 
240 }