GUACAMOLE-615: Correct parser calculation of element lengths.

2025-12-15 15:00:09 +00:00 · 2023-04-21 16:51:27 -07:00
parent 82033adad0
commit 81f0e8c280
3 changed files with 227 additions and 75 deletions
--- a/guacamole-common-js/src/main/webapp/modules/Parser.js
+++ b/guacamole-common-js/src/main/webapp/modules/Parser.js
@@ -22,14 +22,16 @@ var Guacamole = Guacamole || {};
 /**
 * Simple Guacamole protocol parser that invokes an oninstruction event when
 * full instructions are available from data received via receive().
- * 
+ *
 * @constructor
 */
-Guacamole.Parser = function() {
+Guacamole.Parser = function Parser() {

    /**
     * Reference to this parser.
+     *
     * @private
+     * @type {!Guacamole.Parser}
     */
    var parser = this;

@@ -37,24 +39,51 @@ Guacamole.Parser = function() {
     * Current buffer of received data. This buffer grows until a full
     * element is available. After a full element is available, that element
     * is flushed into the element buffer.
-     * 
+     *
     * @private
+     * @type {!string}
     */
-    var buffer = "";
+    var buffer = '';

    /**
     * Buffer of all received, complete elements. After an entire instruction
     * is read, this buffer is flushed, and a new instruction begins.
-     * 
+     *
     * @private
+     * @type {!string[]}
     */
-    var element_buffer = [];
+    var elementBuffer = [];

-    // The location of the last element's terminator
-    var element_end = -1;
+    /**
+     * The character offset within the buffer of the current or most recently
+     * parsed element's terminator. If sufficient characters have not yet been
+     * read via calls to receive(), this may point to an offset well beyond the
+     * end of the buffer. If no characters for an element have yet been read,
+     * this will be -1.
+     *
+     * @private
+     * @type {!number}
+     */
+    var elementEnd = -1;

-    // Where to start the next length search or the next element
-    var start_index = 0;
+    /**
+     * The character offset within the buffer of the location that the parser
+     * should start looking for the next element length search or next element
+     * value.
+     *
+     * @private
+     * @type {!number}
+     */
+    var startIndex = 0;
+
+    /**
+     * The declared length of the current element being parsed, in Unicode
+     * codepoints.
+     *
+     * @private
+     * @type {!number}
+     */
+    var elementCodepoints = 0;

    /**
     * Appends the given instruction data packet to the internal buffer of
@@ -64,79 +93,119 @@ Guacamole.Parser = function() {
     * @param {!string} packet
     *     The instruction data to receive.
     */
-    this.receive = function(packet) {
+    this.receive = function receive(packet) {

        // Truncate buffer as necessary
-        if (start_index > 4096 && element_end >= start_index) {
+        if (startIndex > 4096 && elementEnd >= startIndex) {

-            buffer = buffer.substring(start_index);
+            buffer = buffer.substring(startIndex);

            // Reset parse relative to truncation
-            element_end -= start_index;
-            start_index = 0;
+            elementEnd -= startIndex;
+            startIndex = 0;

        }

-        // Append data to buffer
-        buffer += packet;
+        // Append data to buffer ONLY if there is outstanding data present. It
+        // is otherwise much faster to simply parse the received buffer as-is,
+        // and tunnel implementations can take advantage of this by preferring
+        // to send only complete instructions. Both the HTTP and WebSocket
+        // tunnel implementations included with Guacamole already do this.
+        if (buffer.length)
+            buffer += packet;
+        else
+            buffer = packet;

        // While search is within currently received data
-        while (element_end < buffer.length) {
+        while (elementEnd < buffer.length) {

            // If we are waiting for element data
-            if (element_end >= start_index) {
+            if (elementEnd >= startIndex) {
+
+                // If we have enough data in the buffer to fill the element
+                // value, but the number of codepoints in the expected substring
+                // containing the element value value is less that its declared
+                // length, that can only be because the element contains
+                // characters split between high and low surrogates, and the
+                // actual end of the element value is further out. The minimum
+                // number of additional characters that must be read to satisfy
+                // the declared length is simply the difference between the
+                // number of codepoints actually present vs. the expected
+                // length.
+                var codepoints = Guacamole.Parser.codePointCount(buffer, startIndex, elementEnd);
+                if (codepoints < elementCodepoints) {
+                    elementEnd += elementCodepoints - codepoints;
+                    continue;
+                }
+
+                // If the current element ends with a character involving both
+                // a high and low surrogate, elementEnd points to the low
+                // surrogate and NOT the element terminator. We must shift the
+                // end and reevaluate.
+                else if (elementCodepoints && buffer.codePointAt(elementEnd - 1) >= 0x10000) {
+                    elementEnd++;
+                    continue;
+                }

                // We now have enough data for the element. Parse.
-                var element = buffer.substring(start_index, element_end);
-                var terminator = buffer.substring(element_end, element_end+1);
+                var element = buffer.substring(startIndex, elementEnd);
+                var terminator = buffer.substring(elementEnd, elementEnd+1);

                // Add element to array
-                element_buffer.push(element);
+                elementBuffer.push(element);

                // If last element, handle instruction
-                if (terminator == ";") {
+                if (terminator === ';') {

                    // Get opcode
-                    var opcode = element_buffer.shift();
+                    var opcode = elementBuffer.shift();

                    // Call instruction handler.
-                    if (parser.oninstruction != null)
-                        parser.oninstruction(opcode, element_buffer);
+                    if (parser.oninstruction !== null)
+                        parser.oninstruction(opcode, elementBuffer);

                    // Clear elements
-                    element_buffer.length = 0;
+                    elementBuffer = [];
+
+                    // Immediately truncate buffer if its contents have been
+                    // completely parsed, so that the next call to receive()
+                    // need not append to the buffer unnecessarily
+                    if (elementEnd + 1 === buffer.length) {
+                        elementEnd = -1;
+                        buffer = '';
+                    }

                }
-                else if (terminator != ',')
-                    throw new Error("Illegal terminator.");
+                else if (terminator !== ',')
+                    throw new Error('Element terminator of instruction was not ";" nor ",".');

                // Start searching for length at character after
                // element terminator
-                start_index = element_end + 1;
+                startIndex = elementEnd + 1;

            }

            // Search for end of length
-            var length_end = buffer.indexOf(".", start_index);
-            if (length_end != -1) {
+            var lengthEnd = buffer.indexOf('.', startIndex);
+            if (lengthEnd !== -1) {

                // Parse length
-                var length = parseInt(buffer.substring(element_end+1, length_end));
-                if (isNaN(length))
-                    throw new Error("Non-numeric character in element length.");
+                elementCodepoints = parseInt(buffer.substring(elementEnd+1, lengthEnd));
+                if (isNaN(elementCodepoints))
+                    throw new Error('Non-numeric character in element length.');

                // Calculate start of element
-                start_index = length_end + 1;
+                startIndex = lengthEnd + 1;

                // Calculate location of element terminator
-                element_end = start_index + length;
+                elementEnd = startIndex + elementCodepoints;

            }
-            
+
            // If no period yet, continue search when more data
            // is received
            else {
-                start_index = buffer.length;
+                startIndex = buffer.length;
                break;
            }

@@ -146,7 +215,7 @@ Guacamole.Parser = function() {

    /**
     * Fired once for every complete Guacamole instruction received, in order.
-     * 
+     *
     * @event
     * @param {!string} opcode
     *     The Guacamole instruction opcode.
@@ -157,3 +226,34 @@ Guacamole.Parser = function() {
    this.oninstruction = null;

 };
+
+/**
+ * Returns the number of Unicode codepoints (not code units) within the given
+ * string. If character offsets are provided, only codepoints between those
+ * offsets are counted. Unlike the length property of a string, this function
+ * counts proper surrogate pairs as a single codepoint. High and low surrogate
+ * characters that are not part of a proper surrogate pair are counted
+ * separately as individual codepoints.
+ *
+ * @param {!string} str
+ *     The string whose contents should be inspected.
+ *
+ * @param {number} [start=0]
+ *     The index of the location in the given string where codepoint counting
+ *     should start. If omitted, counting will begin at the start of the
+ *     string.
+ *
+ * @param {number} [end]
+ *     The index of the first location in the given string after where counting
+ *     should stop (the character after the last character being counted). If
+ *     omitted, all characters after the start location will be counted.
+ *
+ * @returns {!number}
+ *     The number of Unicode codepoints within the requested portion of the
+ *     given string.
+ */
+Guacamole.Parser.codePointCount = function codePointCount(str, start, end) {
+    str = str.substring(start || 0, end);
+    var surrogatePairs = str.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
+    return str.length - (surrogatePairs ? surrogatePairs.length : 0);
+};