UTF-8

UTF-8 est suffisamment intelligent pour coder les chars < 128 sur 1 byte et donc seulement les chars > 128 sur 2 bytes

function encodeUTF8(string) {
	var utftext = "";

	for (var n = 0; n < string.length; n++) {

		var c = string.charCodeAt(n);

		if (c < 128) {
			utftext += String.fromCharCode(c);
		}
		else if((c > 127) && (c < 2048)) {
			utftext += String.fromCharCode((c >> 6) | 192);
			utftext += String.fromCharCode((c & 63) | 128);
		}
		else {
			utftext += String.fromCharCode((c >> 12) | 224);
			utftext += String.fromCharCode(((c >> 6) & 63) | 128);
			utftext += String.fromCharCode((c & 63) | 128);
		}

	}

	return utftext;
}

Continuation bytes

Can be used to count code points (unicode chars): skip all bytes start with 10xxxxxx

// Count UTF-8 chars:
while (*c) count += ((*(c++) & 0xC0) == 0x80) ? 0 : 1;

Are part of non first bytes of multi bytes char.

Last updated