GUACAMOLE-615: Correct parser calculation of element lengths.

This commit is contained in:
Mike Jumper
2023-04-21 16:51:27 -07:00
parent 82033adad0
commit 81f0e8c280
3 changed files with 227 additions and 75 deletions

View File

@@ -25,11 +25,13 @@ var Guacamole = Guacamole || {};
* *
* @constructor * @constructor
*/ */
Guacamole.Parser = function() { Guacamole.Parser = function Parser() {
/** /**
* Reference to this parser. * Reference to this parser.
*
* @private * @private
* @type {!Guacamole.Parser}
*/ */
var parser = this; var parser = this;
@@ -39,22 +41,49 @@ Guacamole.Parser = function() {
* is flushed into the element buffer. * is flushed into the element buffer.
* *
* @private * @private
* @type {!string}
*/ */
var buffer = ""; var buffer = '';
/** /**
* Buffer of all received, complete elements. After an entire instruction * Buffer of all received, complete elements. After an entire instruction
* is read, this buffer is flushed, and a new instruction begins. * is read, this buffer is flushed, and a new instruction begins.
* *
* @private * @private
* @type {!string[]}
*/ */
var element_buffer = []; var elementBuffer = [];
// The location of the last element's terminator /**
var element_end = -1; * The character offset within the buffer of the current or most recently
* parsed element's terminator. If sufficient characters have not yet been
* read via calls to receive(), this may point to an offset well beyond the
* end of the buffer. If no characters for an element have yet been read,
* this will be -1.
*
* @private
* @type {!number}
*/
var elementEnd = -1;
// Where to start the next length search or the next element /**
var start_index = 0; * The character offset within the buffer of the location that the parser
* should start looking for the next element length search or next element
* value.
*
* @private
* @type {!number}
*/
var startIndex = 0;
/**
* The declared length of the current element being parsed, in Unicode
* codepoints.
*
* @private
* @type {!number}
*/
var elementCodepoints = 0;
/** /**
* Appends the given instruction data packet to the internal buffer of * Appends the given instruction data packet to the internal buffer of
@@ -64,79 +93,119 @@ Guacamole.Parser = function() {
* @param {!string} packet * @param {!string} packet
* The instruction data to receive. * The instruction data to receive.
*/ */
this.receive = function(packet) { this.receive = function receive(packet) {
// Truncate buffer as necessary // Truncate buffer as necessary
if (start_index > 4096 && element_end >= start_index) { if (startIndex > 4096 && elementEnd >= startIndex) {
buffer = buffer.substring(start_index); buffer = buffer.substring(startIndex);
// Reset parse relative to truncation // Reset parse relative to truncation
element_end -= start_index; elementEnd -= startIndex;
start_index = 0; startIndex = 0;
} }
// Append data to buffer // Append data to buffer ONLY if there is outstanding data present. It
buffer += packet; // is otherwise much faster to simply parse the received buffer as-is,
// and tunnel implementations can take advantage of this by preferring
// to send only complete instructions. Both the HTTP and WebSocket
// tunnel implementations included with Guacamole already do this.
if (buffer.length)
buffer += packet;
else
buffer = packet;
// While search is within currently received data // While search is within currently received data
while (element_end < buffer.length) { while (elementEnd < buffer.length) {
// If we are waiting for element data // If we are waiting for element data
if (element_end >= start_index) { if (elementEnd >= startIndex) {
// If we have enough data in the buffer to fill the element
// value, but the number of codepoints in the expected substring
// containing the element value value is less that its declared
// length, that can only be because the element contains
// characters split between high and low surrogates, and the
// actual end of the element value is further out. The minimum
// number of additional characters that must be read to satisfy
// the declared length is simply the difference between the
// number of codepoints actually present vs. the expected
// length.
var codepoints = Guacamole.Parser.codePointCount(buffer, startIndex, elementEnd);
if (codepoints < elementCodepoints) {
elementEnd += elementCodepoints - codepoints;
continue;
}
// If the current element ends with a character involving both
// a high and low surrogate, elementEnd points to the low
// surrogate and NOT the element terminator. We must shift the
// end and reevaluate.
else if (elementCodepoints && buffer.codePointAt(elementEnd - 1) >= 0x10000) {
elementEnd++;
continue;
}
// We now have enough data for the element. Parse. // We now have enough data for the element. Parse.
var element = buffer.substring(start_index, element_end); var element = buffer.substring(startIndex, elementEnd);
var terminator = buffer.substring(element_end, element_end+1); var terminator = buffer.substring(elementEnd, elementEnd+1);
// Add element to array // Add element to array
element_buffer.push(element); elementBuffer.push(element);
// If last element, handle instruction // If last element, handle instruction
if (terminator == ";") { if (terminator === ';') {
// Get opcode // Get opcode
var opcode = element_buffer.shift(); var opcode = elementBuffer.shift();
// Call instruction handler. // Call instruction handler.
if (parser.oninstruction != null) if (parser.oninstruction !== null)
parser.oninstruction(opcode, element_buffer); parser.oninstruction(opcode, elementBuffer);
// Clear elements // Clear elements
element_buffer.length = 0; elementBuffer = [];
// Immediately truncate buffer if its contents have been
// completely parsed, so that the next call to receive()
// need not append to the buffer unnecessarily
if (elementEnd + 1 === buffer.length) {
elementEnd = -1;
buffer = '';
}
} }
else if (terminator != ',') else if (terminator !== ',')
throw new Error("Illegal terminator."); throw new Error('Element terminator of instruction was not ";" nor ",".');
// Start searching for length at character after // Start searching for length at character after
// element terminator // element terminator
start_index = element_end + 1; startIndex = elementEnd + 1;
} }
// Search for end of length // Search for end of length
var length_end = buffer.indexOf(".", start_index); var lengthEnd = buffer.indexOf('.', startIndex);
if (length_end != -1) { if (lengthEnd !== -1) {
// Parse length // Parse length
var length = parseInt(buffer.substring(element_end+1, length_end)); elementCodepoints = parseInt(buffer.substring(elementEnd+1, lengthEnd));
if (isNaN(length)) if (isNaN(elementCodepoints))
throw new Error("Non-numeric character in element length."); throw new Error('Non-numeric character in element length.');
// Calculate start of element // Calculate start of element
start_index = length_end + 1; startIndex = lengthEnd + 1;
// Calculate location of element terminator // Calculate location of element terminator
element_end = start_index + length; elementEnd = startIndex + elementCodepoints;
} }
// If no period yet, continue search when more data // If no period yet, continue search when more data
// is received // is received
else { else {
start_index = buffer.length; startIndex = buffer.length;
break; break;
} }
@@ -157,3 +226,34 @@ Guacamole.Parser = function() {
this.oninstruction = null; this.oninstruction = null;
}; };
/**
* Returns the number of Unicode codepoints (not code units) within the given
* string. If character offsets are provided, only codepoints between those
* offsets are counted. Unlike the length property of a string, this function
* counts proper surrogate pairs as a single codepoint. High and low surrogate
* characters that are not part of a proper surrogate pair are counted
* separately as individual codepoints.
*
* @param {!string} str
* The string whose contents should be inspected.
*
* @param {number} [start=0]
* The index of the location in the given string where codepoint counting
* should start. If omitted, counting will begin at the start of the
* string.
*
* @param {number} [end]
* The index of the first location in the given string after where counting
* should stop (the character after the last character being counted). If
* omitted, all characters after the start location will be counted.
*
* @returns {!number}
* The number of Unicode codepoints within the requested portion of the
* given string.
*/
Guacamole.Parser.codePointCount = function codePointCount(str, start, end) {
str = str.substring(start || 0, end);
var surrogatePairs = str.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
return str.length - (surrogatePairs ? surrogatePairs.length : 0);
};

View File

@@ -93,6 +93,22 @@ public class GuacamoleInstruction {
return args; return args;
} }
/**
* Appends the given value to the provided StringBuilder as a Guacamole
* instruction element, including length prefix.
*
* @param buff
* The StringBuilder to append the element to.
*
* @param element
* The string value of the element to append.
*/
private static void appendElement(StringBuilder buff, String element) {
buff.append(element.codePointCount(0, element.length()));
buff.append('.');
buff.append(element);
}
/** /**
* Returns this GuacamoleInstruction in the form it would be sent over the * Returns this GuacamoleInstruction in the form it would be sent over the
* Guacamole protocol. * Guacamole protocol.
@@ -111,16 +127,12 @@ public class GuacamoleInstruction {
StringBuilder buff = new StringBuilder(); StringBuilder buff = new StringBuilder();
// Write opcode // Write opcode
buff.append(opcode.length()); appendElement(buff, opcode);
buff.append('.');
buff.append(opcode);
// Write argument values // Write argument values
for (String value : args) { for (String value : args) {
buff.append(','); buff.append(',');
buff.append(value.length()); appendElement(buff, value);
buff.append('.');
buff.append(value);
} }
// Write terminator // Write terminator

View File

@@ -21,7 +21,6 @@ package org.apache.guacamole.protocol;
import java.util.Arrays; import java.util.Arrays;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import org.apache.guacamole.GuacamoleException; import org.apache.guacamole.GuacamoleException;
import org.apache.guacamole.GuacamoleServerException; import org.apache.guacamole.GuacamoleServerException;
@@ -87,10 +86,18 @@ public class GuacamoleParser implements Iterator<GuacamoleInstruction> {
private State state = State.PARSING_LENGTH; private State state = State.PARSING_LENGTH;
/** /**
* The length of the current element, if known. * The length of the current element, if known, in Java characters. This
* value may be adjusted as an element is parsed to take surrogates into
* account.
*/ */
private int elementLength = 0; private int elementLength = 0;
/**
* The length of the current element, if known, in Unicode codepoints. This
* value will NOT change as an element is parsed.
*/
private int elementCodepoints;
/** /**
* The number of elements currently parsed. * The number of elements currently parsed.
*/ */
@@ -104,13 +111,22 @@ public class GuacamoleParser implements Iterator<GuacamoleInstruction> {
/** /**
* Appends data from the given buffer to the current instruction. * Appends data from the given buffer to the current instruction.
* *
* @param chunk The buffer containing the data to append. * @param chunk
* @param offset The offset within the buffer where the data begins. * The buffer containing the data to append.
* @param length The length of the data to append. *
* @return The number of characters appended, or 0 if complete instructions * @param offset
* have already been parsed and must be read via next() before * The offset within the buffer where the data begins.
* more data can be appended. *
* @throws GuacamoleException If an error occurs while parsing the new data. * @param length
* The length of the data to append.
*
* @return
* The number of characters appended, or 0 if complete instructions
* have already been parsed and must be read via next() before more
* data can be appended.
*
* @throws GuacamoleException
* If an error occurs while parsing the new data.
*/ */
public int append(char chunk[], int offset, int length) throws GuacamoleException { public int append(char chunk[], int offset, int length) throws GuacamoleException {
@@ -156,39 +172,63 @@ public class GuacamoleParser implements Iterator<GuacamoleInstruction> {
} }
// Save length // Save length
elementLength = parsedLength; elementCodepoints = elementLength = parsedLength;
} // end parse length } // end parse length
// Parse element content, if available // Parse element content, if available
if (state == State.PARSING_CONTENT && charsParsed + elementLength + 1 <= length) { while (state == State.PARSING_CONTENT && charsParsed + elementLength + 1 <= length) {
// Read element // Read element (which may not match element length if surrogate
// characters are present)
String element = new String(chunk, offset + charsParsed, elementLength); String element = new String(chunk, offset + charsParsed, elementLength);
// Verify element contains the number of whole Unicode characters
// expected, scheduling a future read if we don't yet have enough
// characters
int codepoints = element.codePointCount(0, element.length());
if (codepoints < elementCodepoints) {
elementLength += elementCodepoints - codepoints;
continue;
}
// If the current element ends with a character involving both
// a high and low surrogate, elementLength points to the low
// surrogate and NOT the element terminator. We must correct the
// length and reevaluate.
else if (Character.isSurrogatePair(chunk[offset + charsParsed + elementLength - 1],
chunk[offset + charsParsed + elementLength])) {
elementLength++;
continue;
}
charsParsed += elementLength; charsParsed += elementLength;
elementLength = 0; elementLength = 0;
// Read terminator char following element
char terminator = chunk[offset + charsParsed++];
// Add element to currently parsed elements // Add element to currently parsed elements
elements[elementCount++] = element; elements[elementCount++] = element;
// If semicolon, store end-of-instruction // Read terminator char following element
if (terminator == ';') { char terminator = chunk[offset + charsParsed++];
state = State.COMPLETE; switch (terminator) {
parsedInstruction = new GuacamoleInstruction(elements[0],
Arrays.asList(elements).subList(1, elementCount));
}
// If comma, move on to next element // If semicolon, store end-of-instruction
else if (terminator == ',') case ';':
state = State.PARSING_LENGTH; state = State.COMPLETE;
parsedInstruction = new GuacamoleInstruction(elements[0],
Arrays.asList(elements).subList(1, elementCount));
break;
// If comma, move on to next element
case ',':
state = State.PARSING_LENGTH;
break;
// Otherwise, parse error
default:
state = State.ERROR;
throw new GuacamoleServerException("Element terminator of instruction was not ';' nor ','");
// Otherwise, parse error
else {
state = State.ERROR;
throw new GuacamoleServerException("Element terminator of instruction was not ';' nor ','");
} }
} // end parse content } // end parse content