GUACAMOLE-615: Correct parser calculation of element lengths.

2025-10-27 07:03:07 +00:00 · 2023-04-21 16:51:27 -07:00
parent 82033adad0
commit 81f0e8c280
3 changed files with 227 additions and 75 deletions
--- a/guacamole-common-js/src/main/webapp/modules/Parser.js
+++ b/guacamole-common-js/src/main/webapp/modules/Parser.js
@@ -25,11 +25,13 @@ var Guacamole = Guacamole || {};
 *
 * @constructor
 */
-Guacamole.Parser = function() {
+Guacamole.Parser = function Parser() {
    /**
     * Reference to this parser.
     *
     * @private
     * @type {!Guacamole.Parser}
     */
    var parser = this;
@@ -39,22 +41,49 @@ Guacamole.Parser = function() {
     * is flushed into the element buffer.
     *
     * @private
     * @type {!string}
     */
-    var buffer = "";
+    var buffer = '';
    /**
     * Buffer of all received, complete elements. After an entire instruction
     * is read, this buffer is flushed, and a new instruction begins.
     *
     * @private
     * @type {!string[]}
     */
-    var element_buffer = [];
+    var elementBuffer = [];
-    // The location of the last element's terminator
+    /**
-    var element_end = -1;
+     * The character offset within the buffer of the current or most recently
     * parsed element's terminator. If sufficient characters have not yet been
     * read via calls to receive(), this may point to an offset well beyond the
     * end of the buffer. If no characters for an element have yet been read,
     * this will be -1.
     *
     * @private
     * @type {!number}
     */
    var elementEnd = -1;
-    // Where to start the next length search or the next element
+    /**
-    var start_index = 0;
+     * The character offset within the buffer of the location that the parser
     * should start looking for the next element length search or next element
     * value.
     *
     * @private
     * @type {!number}
     */
    var startIndex = 0;
    /**
     * The declared length of the current element being parsed, in Unicode
     * codepoints.
     *
     * @private
     * @type {!number}
     */
    var elementCodepoints = 0;
    /**
     * Appends the given instruction data packet to the internal buffer of
@@ -64,79 +93,119 @@ Guacamole.Parser = function() {
     * @param {!string} packet
     *     The instruction data to receive.
     */
-    this.receive = function(packet) {
+    this.receive = function receive(packet) {
        // Truncate buffer as necessary
-        if (start_index > 4096 && element_end >= start_index) {
+        if (startIndex > 4096 && elementEnd >= startIndex) {
-            buffer = buffer.substring(start_index);
+            buffer = buffer.substring(startIndex);
            // Reset parse relative to truncation
-            element_end -= start_index;
+            elementEnd -= startIndex;
-            start_index = 0;
+            startIndex = 0;
        }
-        // Append data to buffer
+        // Append data to buffer ONLY if there is outstanding data present. It
-        buffer += packet;
+        // is otherwise much faster to simply parse the received buffer as-is,
        // and tunnel implementations can take advantage of this by preferring
        // to send only complete instructions. Both the HTTP and WebSocket
        // tunnel implementations included with Guacamole already do this.
        if (buffer.length)
            buffer += packet;
        else
            buffer = packet;
        // While search is within currently received data
-        while (element_end < buffer.length) {
+        while (elementEnd < buffer.length) {
            // If we are waiting for element data
-            if (element_end >= start_index) {
+            if (elementEnd >= startIndex) {
                // If we have enough data in the buffer to fill the element
                // value, but the number of codepoints in the expected substring
                // containing the element value value is less that its declared
                // length, that can only be because the element contains
                // characters split between high and low surrogates, and the
                // actual end of the element value is further out. The minimum
                // number of additional characters that must be read to satisfy
                // the declared length is simply the difference between the
                // number of codepoints actually present vs. the expected
                // length.
                var codepoints = Guacamole.Parser.codePointCount(buffer, startIndex, elementEnd);
                if (codepoints < elementCodepoints) {
                    elementEnd += elementCodepoints - codepoints;
                    continue;
                }
                // If the current element ends with a character involving both
                // a high and low surrogate, elementEnd points to the low
                // surrogate and NOT the element terminator. We must shift the
                // end and reevaluate.
                else if (elementCodepoints && buffer.codePointAt(elementEnd - 1) >= 0x10000) {
                    elementEnd++;
                    continue;
                }
                // We now have enough data for the element. Parse.
-                var element = buffer.substring(start_index, element_end);
+                var element = buffer.substring(startIndex, elementEnd);
-                var terminator = buffer.substring(element_end, element_end+1);
+                var terminator = buffer.substring(elementEnd, elementEnd+1);
                // Add element to array
-                element_buffer.push(element);
+                elementBuffer.push(element);
                // If last element, handle instruction
-                if (terminator == ";") {
+                if (terminator === ';') {
                    // Get opcode
-                    var opcode = element_buffer.shift();
+                    var opcode = elementBuffer.shift();
                    // Call instruction handler.
-                    if (parser.oninstruction != null)
+                    if (parser.oninstruction !== null)
-                        parser.oninstruction(opcode, element_buffer);
+                        parser.oninstruction(opcode, elementBuffer);
                    // Clear elements
-                    element_buffer.length = 0;
+                    elementBuffer = [];
                    // Immediately truncate buffer if its contents have been
                    // completely parsed, so that the next call to receive()
                    // need not append to the buffer unnecessarily
                    if (elementEnd + 1 === buffer.length) {
                        elementEnd = -1;
                        buffer = '';
                    }
                }
-                else if (terminator != ',')
+                else if (terminator !== ',')
-                    throw new Error("Illegal terminator.");
+                    throw new Error('Element terminator of instruction was not ";" nor ",".');
                // Start searching for length at character after
                // element terminator
-                start_index = element_end + 1;
+                startIndex = elementEnd + 1;
            }
            // Search for end of length
-            var length_end = buffer.indexOf(".", start_index);
+            var lengthEnd = buffer.indexOf('.', startIndex);
-            if (length_end != -1) {
+            if (lengthEnd !== -1) {
                // Parse length
-                var length = parseInt(buffer.substring(element_end+1, length_end));
+                elementCodepoints = parseInt(buffer.substring(elementEnd+1, lengthEnd));
-                if (isNaN(length))
+                if (isNaN(elementCodepoints))
-                    throw new Error("Non-numeric character in element length.");
+                    throw new Error('Non-numeric character in element length.');
                // Calculate start of element
-                start_index = length_end + 1;
+                startIndex = lengthEnd + 1;
                // Calculate location of element terminator
-                element_end = start_index + length;
+                elementEnd = startIndex + elementCodepoints;
            }
            // If no period yet, continue search when more data
            // is received
            else {
-                start_index = buffer.length;
+                startIndex = buffer.length;
                break;
            }
@@ -157,3 +226,34 @@ Guacamole.Parser = function() {
    this.oninstruction = null;
 };
 /**
 * Returns the number of Unicode codepoints (not code units) within the given
 * string. If character offsets are provided, only codepoints between those
 * offsets are counted. Unlike the length property of a string, this function
 * counts proper surrogate pairs as a single codepoint. High and low surrogate
 * characters that are not part of a proper surrogate pair are counted
 * separately as individual codepoints.
 *
 * @param {!string} str
 *     The string whose contents should be inspected.
 *
 * @param {number} [start=0]
 *     The index of the location in the given string where codepoint counting
 *     should start. If omitted, counting will begin at the start of the
 *     string.
 *
 * @param {number} [end]
 *     The index of the first location in the given string after where counting
 *     should stop (the character after the last character being counted). If
 *     omitted, all characters after the start location will be counted.
 *
 * @returns {!number}
 *     The number of Unicode codepoints within the requested portion of the
 *     given string.
 */
 Guacamole.Parser.codePointCount = function codePointCount(str, start, end) {
    str = str.substring(start || 0, end);
    var surrogatePairs = str.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
    return str.length - (surrogatePairs ? surrogatePairs.length : 0);
 };
--- a/guacamole-common/src/main/java/org/apache/guacamole/protocol/GuacamoleInstruction.java
+++ b/guacamole-common/src/main/java/org/apache/guacamole/protocol/GuacamoleInstruction.java
@@ -93,6 +93,22 @@ public class GuacamoleInstruction {
        return args;
    }
    /**
     * Appends the given value to the provided StringBuilder as a Guacamole
     * instruction element, including length prefix.
     *
     * @param buff
     *     The StringBuilder to append the element to.
     *
     * @param element
     *     The string value of the element to append.
     */
    private static void appendElement(StringBuilder buff, String element) {
        buff.append(element.codePointCount(0, element.length()));
        buff.append('.');
        buff.append(element);
    }
    /**
     * Returns this GuacamoleInstruction in the form it would be sent over the
     * Guacamole protocol.
@@ -111,16 +127,12 @@ public class GuacamoleInstruction {
            StringBuilder buff = new StringBuilder();
            // Write opcode
-            buff.append(opcode.length());
+            appendElement(buff, opcode);
            buff.append('.');
            buff.append(opcode);
            // Write argument values
            for (String value : args) {
                buff.append(',');
-                buff.append(value.length());
+                appendElement(buff, value);
                buff.append('.');
                buff.append(value);
            }
            // Write terminator
--- a/guacamole-common/src/main/java/org/apache/guacamole/protocol/GuacamoleParser.java
+++ b/guacamole-common/src/main/java/org/apache/guacamole/protocol/GuacamoleParser.java
@@ -21,7 +21,6 @@ package org.apache.guacamole.protocol;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 import org.apache.guacamole.GuacamoleException;
 import org.apache.guacamole.GuacamoleServerException;
@@ -87,10 +86,18 @@ public class GuacamoleParser implements Iterator<GuacamoleInstruction> {
    private State state = State.PARSING_LENGTH;
    /**
-     * The length of the current element, if known.
+     * The length of the current element, if known, in Java characters. This
     * value may be adjusted as an element is parsed to take surrogates into
     * account.
     */
    private int elementLength = 0;
    /**
     * The length of the current element, if known, in Unicode codepoints. This
     * value will NOT change as an element is parsed.
     */
    private int elementCodepoints;
    /**
     * The number of elements currently parsed.
     */
@@ -104,13 +111,22 @@ public class GuacamoleParser implements Iterator<GuacamoleInstruction> {
    /**
     * Appends data from the given buffer to the current instruction.
     * 
-     * @param chunk The buffer containing the data to append.
+     * @param chunk
-     * @param offset The offset within the buffer where the data begins.
+     *     The buffer containing the data to append.
-     * @param length The length of the data to append.
+     *
-     * @return The number of characters appended, or 0 if complete instructions
+     * @param offset
-     *         have already been parsed and must be read via next() before
+     *     The offset within the buffer where the data begins.
-     *         more data can be appended.
+     *
-     * @throws GuacamoleException If an error occurs while parsing the new data.
+     * @param length
     *     The length of the data to append.
     *
     * @return
     *     The number of characters appended, or 0 if complete instructions
     *     have already been parsed and must be read via next() before more
     *     data can be appended.
     *
     * @throws GuacamoleException
     *     If an error occurs while parsing the new data.
     */
    public int append(char chunk[], int offset, int length) throws GuacamoleException {
@@ -156,39 +172,63 @@ public class GuacamoleParser implements Iterator<GuacamoleInstruction> {
            }
            // Save length
-            elementLength = parsedLength;
+            elementCodepoints = elementLength = parsedLength;
        } // end parse length
        // Parse element content, if available
-        if (state == State.PARSING_CONTENT && charsParsed + elementLength + 1 <= length) {
+        while (state == State.PARSING_CONTENT && charsParsed + elementLength + 1 <= length) {
-            // Read element
+            // Read element (which may not match element length if surrogate
            // characters are present)
            String element = new String(chunk, offset + charsParsed, elementLength);
            // Verify element contains the number of whole Unicode characters
            // expected, scheduling a future read if we don't yet have enough
            // characters
            int codepoints = element.codePointCount(0, element.length());
            if (codepoints < elementCodepoints) {
                elementLength += elementCodepoints - codepoints;
                continue;
            }
            // If the current element ends with a character involving both
            // a high and low surrogate, elementLength points to the low
            // surrogate and NOT the element terminator. We must correct the
            // length and reevaluate.
            else if (Character.isSurrogatePair(chunk[offset + charsParsed + elementLength - 1],
                    chunk[offset + charsParsed + elementLength])) {
                elementLength++;
                continue;
            }
            charsParsed += elementLength;
            elementLength = 0;
            // Read terminator char following element
            char terminator = chunk[offset + charsParsed++];
            // Add element to currently parsed elements
            elements[elementCount++] = element;
-            // If semicolon, store end-of-instruction
+            // Read terminator char following element
-            if (terminator == ';') {
+            char terminator = chunk[offset + charsParsed++];
-                state = State.COMPLETE;
+            switch (terminator) {
                parsedInstruction = new GuacamoleInstruction(elements[0],
                        Arrays.asList(elements).subList(1, elementCount));
            }
-            // If comma, move on to next element
+                // If semicolon, store end-of-instruction
-            else if (terminator == ',')
+                case ';':
-                state = State.PARSING_LENGTH;
+                    state = State.COMPLETE;
                    parsedInstruction = new GuacamoleInstruction(elements[0],
                            Arrays.asList(elements).subList(1, elementCount));
                    break;
                // If comma, move on to next element
                case ',':
                    state = State.PARSING_LENGTH;
                    break;
                // Otherwise, parse error
                default:
                    state = State.ERROR;
                    throw new GuacamoleServerException("Element terminator of instruction was not ';' nor ','");
            // Otherwise, parse error
            else {
                state = State.ERROR;
                throw new GuacamoleServerException("Element terminator of instruction was not ';' nor ','");
            }
        } // end parse content