GUACAMOLE-896: Add JavaScript UTF-8 parser implementation.

This commit is contained in:
Michael Jumper
2022-02-11 16:33:02 -08:00
parent a97404b6ad
commit f559f5ca70
2 changed files with 134 additions and 93 deletions

View File

@@ -38,6 +38,13 @@ Guacamole.StringReader = function(stream) {
*/
var guac_reader = this;
/**
* Parser for received UTF-8 data.
*
* @type {!Guacamole.UTF8Parser}
*/
var utf8Parser = new Guacamole.UTF8Parser();
/**
* Wrapped Guacamole.ArrayBufferReader.
*
@@ -46,103 +53,11 @@ Guacamole.StringReader = function(stream) {
*/
var array_reader = new Guacamole.ArrayBufferReader(stream);
/**
* The number of bytes remaining for the current codepoint.
*
* @private
* @type {!number}
*/
var bytes_remaining = 0;
/**
* The current codepoint value, as calculated from bytes read so far.
*
* @private
* @type {!number}
*/
var codepoint = 0;
/**
* Decodes the given UTF-8 data into a Unicode string. The data may end in
* the middle of a multibyte character.
*
* @private
* @param {!ArrayBuffer} buffer
* Arbitrary UTF-8 data.
*
* @return {!string}
* A decoded Unicode string.
*/
function __decode_utf8(buffer) {
var text = "";
var bytes = new Uint8Array(buffer);
for (var i=0; i<bytes.length; i++) {
// Get current byte
var value = bytes[i];
// Start new codepoint if nothing yet read
if (bytes_remaining === 0) {
// 1 byte (0xxxxxxx)
if ((value | 0x7F) === 0x7F)
text += String.fromCharCode(value);
// 2 byte (110xxxxx)
else if ((value | 0x1F) === 0xDF) {
codepoint = value & 0x1F;
bytes_remaining = 1;
}
// 3 byte (1110xxxx)
else if ((value | 0x0F )=== 0xEF) {
codepoint = value & 0x0F;
bytes_remaining = 2;
}
// 4 byte (11110xxx)
else if ((value | 0x07) === 0xF7) {
codepoint = value & 0x07;
bytes_remaining = 3;
}
// Invalid byte
else
text += "\uFFFD";
}
// Continue existing codepoint (10xxxxxx)
else if ((value | 0x3F) === 0xBF) {
codepoint = (codepoint << 6) | (value & 0x3F);
bytes_remaining--;
// Write codepoint if finished
if (bytes_remaining === 0)
text += String.fromCharCode(codepoint);
}
// Invalid byte
else {
bytes_remaining = 0;
text += "\uFFFD";
}
}
return text;
}
// Receive blobs as strings
array_reader.ondata = function(buffer) {
// Decode UTF-8
var text = __decode_utf8(buffer);
var text = utf8Parser.decode(buffer);
// Call handler, if present
if (guac_reader.ontext)