diff --git a/guacamole-common-js/src/main/webapp/modules/Parser.js b/guacamole-common-js/src/main/webapp/modules/Parser.js index 7a1a6587c..a30e7851b 100644 --- a/guacamole-common-js/src/main/webapp/modules/Parser.js +++ b/guacamole-common-js/src/main/webapp/modules/Parser.js @@ -22,14 +22,16 @@ var Guacamole = Guacamole || {}; /** * Simple Guacamole protocol parser that invokes an oninstruction event when * full instructions are available from data received via receive(). - * + * * @constructor */ -Guacamole.Parser = function() { +Guacamole.Parser = function Parser() { /** * Reference to this parser. + * * @private + * @type {!Guacamole.Parser} */ var parser = this; @@ -37,24 +39,51 @@ Guacamole.Parser = function() { * Current buffer of received data. This buffer grows until a full * element is available. After a full element is available, that element * is flushed into the element buffer. - * + * * @private + * @type {!string} */ - var buffer = ""; + var buffer = ''; /** * Buffer of all received, complete elements. After an entire instruction * is read, this buffer is flushed, and a new instruction begins. - * + * * @private + * @type {!string[]} */ - var element_buffer = []; + var elementBuffer = []; - // The location of the last element's terminator - var element_end = -1; + /** + * The character offset within the buffer of the current or most recently + * parsed element's terminator. If sufficient characters have not yet been + * read via calls to receive(), this may point to an offset well beyond the + * end of the buffer. If no characters for an element have yet been read, + * this will be -1. + * + * @private + * @type {!number} + */ + var elementEnd = -1; - // Where to start the next length search or the next element - var start_index = 0; + /** + * The character offset within the buffer of the location that the parser + * should start looking for the next element length search or next element + * value. + * + * @private + * @type {!number} + */ + var startIndex = 0; + + /** + * The declared length of the current element being parsed, in Unicode + * codepoints. + * + * @private + * @type {!number} + */ + var elementCodepoints = 0; /** * Appends the given instruction data packet to the internal buffer of @@ -64,79 +93,119 @@ Guacamole.Parser = function() { * @param {!string} packet * The instruction data to receive. */ - this.receive = function(packet) { + this.receive = function receive(packet) { // Truncate buffer as necessary - if (start_index > 4096 && element_end >= start_index) { + if (startIndex > 4096 && elementEnd >= startIndex) { - buffer = buffer.substring(start_index); + buffer = buffer.substring(startIndex); // Reset parse relative to truncation - element_end -= start_index; - start_index = 0; + elementEnd -= startIndex; + startIndex = 0; } - // Append data to buffer - buffer += packet; + // Append data to buffer ONLY if there is outstanding data present. It + // is otherwise much faster to simply parse the received buffer as-is, + // and tunnel implementations can take advantage of this by preferring + // to send only complete instructions. Both the HTTP and WebSocket + // tunnel implementations included with Guacamole already do this. + if (buffer.length) + buffer += packet; + else + buffer = packet; // While search is within currently received data - while (element_end < buffer.length) { + while (elementEnd < buffer.length) { // If we are waiting for element data - if (element_end >= start_index) { + if (elementEnd >= startIndex) { + + // If we have enough data in the buffer to fill the element + // value, but the number of codepoints in the expected substring + // containing the element value value is less that its declared + // length, that can only be because the element contains + // characters split between high and low surrogates, and the + // actual end of the element value is further out. The minimum + // number of additional characters that must be read to satisfy + // the declared length is simply the difference between the + // number of codepoints actually present vs. the expected + // length. + var codepoints = Guacamole.Parser.codePointCount(buffer, startIndex, elementEnd); + if (codepoints < elementCodepoints) { + elementEnd += elementCodepoints - codepoints; + continue; + } + + // If the current element ends with a character involving both + // a high and low surrogate, elementEnd points to the low + // surrogate and NOT the element terminator. We must shift the + // end and reevaluate. + else if (elementCodepoints && buffer.codePointAt(elementEnd - 1) >= 0x10000) { + elementEnd++; + continue; + } // We now have enough data for the element. Parse. - var element = buffer.substring(start_index, element_end); - var terminator = buffer.substring(element_end, element_end+1); + var element = buffer.substring(startIndex, elementEnd); + var terminator = buffer.substring(elementEnd, elementEnd+1); // Add element to array - element_buffer.push(element); + elementBuffer.push(element); // If last element, handle instruction - if (terminator == ";") { + if (terminator === ';') { // Get opcode - var opcode = element_buffer.shift(); + var opcode = elementBuffer.shift(); // Call instruction handler. - if (parser.oninstruction != null) - parser.oninstruction(opcode, element_buffer); + if (parser.oninstruction !== null) + parser.oninstruction(opcode, elementBuffer); // Clear elements - element_buffer.length = 0; + elementBuffer = []; + + // Immediately truncate buffer if its contents have been + // completely parsed, so that the next call to receive() + // need not append to the buffer unnecessarily + if (elementEnd + 1 === buffer.length) { + elementEnd = -1; + buffer = ''; + } } - else if (terminator != ',') - throw new Error("Illegal terminator."); + else if (terminator !== ',') + throw new Error('Element terminator of instruction was not ";" nor ",".'); // Start searching for length at character after // element terminator - start_index = element_end + 1; + startIndex = elementEnd + 1; } // Search for end of length - var length_end = buffer.indexOf(".", start_index); - if (length_end != -1) { + var lengthEnd = buffer.indexOf('.', startIndex); + if (lengthEnd !== -1) { // Parse length - var length = parseInt(buffer.substring(element_end+1, length_end)); - if (isNaN(length)) - throw new Error("Non-numeric character in element length."); + elementCodepoints = parseInt(buffer.substring(elementEnd+1, lengthEnd)); + if (isNaN(elementCodepoints)) + throw new Error('Non-numeric character in element length.'); // Calculate start of element - start_index = length_end + 1; + startIndex = lengthEnd + 1; // Calculate location of element terminator - element_end = start_index + length; + elementEnd = startIndex + elementCodepoints; } - + // If no period yet, continue search when more data // is received else { - start_index = buffer.length; + startIndex = buffer.length; break; } @@ -146,7 +215,7 @@ Guacamole.Parser = function() { /** * Fired once for every complete Guacamole instruction received, in order. - * + * * @event * @param {!string} opcode * The Guacamole instruction opcode. @@ -157,3 +226,34 @@ Guacamole.Parser = function() { this.oninstruction = null; }; + +/** + * Returns the number of Unicode codepoints (not code units) within the given + * string. If character offsets are provided, only codepoints between those + * offsets are counted. Unlike the length property of a string, this function + * counts proper surrogate pairs as a single codepoint. High and low surrogate + * characters that are not part of a proper surrogate pair are counted + * separately as individual codepoints. + * + * @param {!string} str + * The string whose contents should be inspected. + * + * @param {number} [start=0] + * The index of the location in the given string where codepoint counting + * should start. If omitted, counting will begin at the start of the + * string. + * + * @param {number} [end] + * The index of the first location in the given string after where counting + * should stop (the character after the last character being counted). If + * omitted, all characters after the start location will be counted. + * + * @returns {!number} + * The number of Unicode codepoints within the requested portion of the + * given string. + */ +Guacamole.Parser.codePointCount = function codePointCount(str, start, end) { + str = str.substring(start || 0, end); + var surrogatePairs = str.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g); + return str.length - (surrogatePairs ? surrogatePairs.length : 0); +}; diff --git a/guacamole-common/src/main/java/org/apache/guacamole/protocol/GuacamoleInstruction.java b/guacamole-common/src/main/java/org/apache/guacamole/protocol/GuacamoleInstruction.java index c3abd4610..10078d4a0 100644 --- a/guacamole-common/src/main/java/org/apache/guacamole/protocol/GuacamoleInstruction.java +++ b/guacamole-common/src/main/java/org/apache/guacamole/protocol/GuacamoleInstruction.java @@ -93,6 +93,22 @@ public class GuacamoleInstruction { return args; } + /** + * Appends the given value to the provided StringBuilder as a Guacamole + * instruction element, including length prefix. + * + * @param buff + * The StringBuilder to append the element to. + * + * @param element + * The string value of the element to append. + */ + private static void appendElement(StringBuilder buff, String element) { + buff.append(element.codePointCount(0, element.length())); + buff.append('.'); + buff.append(element); + } + /** * Returns this GuacamoleInstruction in the form it would be sent over the * Guacamole protocol. @@ -111,16 +127,12 @@ public class GuacamoleInstruction { StringBuilder buff = new StringBuilder(); // Write opcode - buff.append(opcode.length()); - buff.append('.'); - buff.append(opcode); + appendElement(buff, opcode); // Write argument values for (String value : args) { buff.append(','); - buff.append(value.length()); - buff.append('.'); - buff.append(value); + appendElement(buff, value); } // Write terminator diff --git a/guacamole-common/src/main/java/org/apache/guacamole/protocol/GuacamoleParser.java b/guacamole-common/src/main/java/org/apache/guacamole/protocol/GuacamoleParser.java index 98eb548fb..c51f164c4 100644 --- a/guacamole-common/src/main/java/org/apache/guacamole/protocol/GuacamoleParser.java +++ b/guacamole-common/src/main/java/org/apache/guacamole/protocol/GuacamoleParser.java @@ -21,7 +21,6 @@ package org.apache.guacamole.protocol; import java.util.Arrays; import java.util.Iterator; -import java.util.List; import org.apache.guacamole.GuacamoleException; import org.apache.guacamole.GuacamoleServerException; @@ -87,10 +86,18 @@ public class GuacamoleParser implements Iterator { private State state = State.PARSING_LENGTH; /** - * The length of the current element, if known. + * The length of the current element, if known, in Java characters. This + * value may be adjusted as an element is parsed to take surrogates into + * account. */ private int elementLength = 0; + /** + * The length of the current element, if known, in Unicode codepoints. This + * value will NOT change as an element is parsed. + */ + private int elementCodepoints; + /** * The number of elements currently parsed. */ @@ -104,13 +111,22 @@ public class GuacamoleParser implements Iterator { /** * Appends data from the given buffer to the current instruction. * - * @param chunk The buffer containing the data to append. - * @param offset The offset within the buffer where the data begins. - * @param length The length of the data to append. - * @return The number of characters appended, or 0 if complete instructions - * have already been parsed and must be read via next() before - * more data can be appended. - * @throws GuacamoleException If an error occurs while parsing the new data. + * @param chunk + * The buffer containing the data to append. + * + * @param offset + * The offset within the buffer where the data begins. + * + * @param length + * The length of the data to append. + * + * @return + * The number of characters appended, or 0 if complete instructions + * have already been parsed and must be read via next() before more + * data can be appended. + * + * @throws GuacamoleException + * If an error occurs while parsing the new data. */ public int append(char chunk[], int offset, int length) throws GuacamoleException { @@ -156,39 +172,63 @@ public class GuacamoleParser implements Iterator { } // Save length - elementLength = parsedLength; + elementCodepoints = elementLength = parsedLength; } // end parse length // Parse element content, if available - if (state == State.PARSING_CONTENT && charsParsed + elementLength + 1 <= length) { + while (state == State.PARSING_CONTENT && charsParsed + elementLength + 1 <= length) { - // Read element + // Read element (which may not match element length if surrogate + // characters are present) String element = new String(chunk, offset + charsParsed, elementLength); + + // Verify element contains the number of whole Unicode characters + // expected, scheduling a future read if we don't yet have enough + // characters + int codepoints = element.codePointCount(0, element.length()); + if (codepoints < elementCodepoints) { + elementLength += elementCodepoints - codepoints; + continue; + } + + // If the current element ends with a character involving both + // a high and low surrogate, elementLength points to the low + // surrogate and NOT the element terminator. We must correct the + // length and reevaluate. + else if (Character.isSurrogatePair(chunk[offset + charsParsed + elementLength - 1], + chunk[offset + charsParsed + elementLength])) { + elementLength++; + continue; + } + charsParsed += elementLength; elementLength = 0; - // Read terminator char following element - char terminator = chunk[offset + charsParsed++]; - // Add element to currently parsed elements elements[elementCount++] = element; - - // If semicolon, store end-of-instruction - if (terminator == ';') { - state = State.COMPLETE; - parsedInstruction = new GuacamoleInstruction(elements[0], - Arrays.asList(elements).subList(1, elementCount)); - } - // If comma, move on to next element - else if (terminator == ',') - state = State.PARSING_LENGTH; + // Read terminator char following element + char terminator = chunk[offset + charsParsed++]; + switch (terminator) { + + // If semicolon, store end-of-instruction + case ';': + state = State.COMPLETE; + parsedInstruction = new GuacamoleInstruction(elements[0], + Arrays.asList(elements).subList(1, elementCount)); + break; + + // If comma, move on to next element + case ',': + state = State.PARSING_LENGTH; + break; + + // Otherwise, parse error + default: + state = State.ERROR; + throw new GuacamoleServerException("Element terminator of instruction was not ';' nor ','"); - // Otherwise, parse error - else { - state = State.ERROR; - throw new GuacamoleServerException("Element terminator of instruction was not ';' nor ','"); } } // end parse content