shift73k/assets_old/node_modules/unidecode/unidecode.js

671 lines
16 KiB
JavaScript

/**
* Unidecode takes UTF-8 data and tries to represent it in US-ASCII characters (i.e., the universally displayable characters between 0x00 and 0x7F).
* The representation is almost always an attempt at transliteration -- i.e., conveying, in Roman letters, the pronunciation expressed by the text in
* some other writing system.
*
* The tables used (in data) are converted from the tables provided in the perl library Text::Unidecode (http://search.cpan.org/dist/Text-Unidecode/lib/Text/Unidecode.pm)
* and are distributed under the perl license
*
* @author Francois-Guillaume Ribreau
*
* Based on the port of unidecode for php
*/
'use strict';
var tr = {};
var utf8_rx = /(?![\x00-\x7F]|[\xC0-\xDF][\x80-\xBF]|[\xE0-\xEF][\x80-\xBF]{2}|[\xF0-\xF7][\x80-\xBF]{3})./g;
module.exports = function (str) {
return str.replace(utf8_rx, unidecode_internal_replace);
};
function unidecode_internal_replace(match) {
var utf16 = utf8_to_utf16(match);
if (utf16 > 0xFFFF) {
return '_';
} else {
var h = utf16 >> 8;
var l = utf16 & 0xFF;
// (18) 18 > h < 1e (30)
if (h > 24 && h < 30) return '';
//(d7) 215 > h < 249 (f9) no supported
if (h > 215 && h < 249) return '';
if (!tr[h]) {
switch (dec2hex(h)) {
case '00':
tr[h] = require('./data/x00');
break;
case '01':
tr[h] = require('./data/x01');
break;
case '02':
tr[h] = require('./data/x02');
break;
case '03':
tr[h] = require('./data/x03');
break;
case '04':
tr[h] = require('./data/x04');
break;
case '05':
tr[h] = require('./data/x05');
break;
case '06':
tr[h] = require('./data/x06');
break;
case '07':
tr[h] = require('./data/x07');
break;
case '09':
tr[h] = require('./data/x09');
break;
case '0a':
tr[h] = require('./data/x0a');
break;
case '0b':
tr[h] = require('./data/x0b');
break;
case '0c':
tr[h] = require('./data/x0c');
break;
case '0d':
tr[h] = require('./data/x0d');
break;
case '0e':
tr[h] = require('./data/x0e');
break;
case '0f':
tr[h] = require('./data/x0f');
break;
case '10':
tr[h] = require('./data/x10');
break;
case '11':
tr[h] = require('./data/x11');
break;
case '12':
tr[h] = require('./data/x12');
break;
case '13':
tr[h] = require('./data/x13');
break;
case '14':
tr[h] = require('./data/x14');
break;
case '15':
tr[h] = require('./data/x15');
break;
case '16':
tr[h] = require('./data/x16');
break;
case '17':
tr[h] = require('./data/x17');
break;
case '18':
tr[h] = require('./data/x18');
break;
case '1e':
tr[h] = require('./data/x1e');
break;
case '1f':
tr[h] = require('./data/x1f');
break;
case '20':
tr[h] = require('./data/x20');
break;
case '21':
tr[h] = require('./data/x21');
break;
case '22':
tr[h] = require('./data/x22');
break;
case '23':
tr[h] = require('./data/x23');
break;
case '24':
tr[h] = require('./data/x24');
break;
case '25':
tr[h] = require('./data/x25');
break;
case '26':
tr[h] = require('./data/x26');
break;
case '27':
tr[h] = require('./data/x27');
break;
case '28':
tr[h] = require('./data/x28');
break;
case '2e':
tr[h] = require('./data/x2e');
break;
case '2f':
tr[h] = require('./data/x2f');
break;
case '30':
tr[h] = require('./data/x30');
break;
case '31':
tr[h] = require('./data/x31');
break;
case '32':
tr[h] = require('./data/x32');
break;
case '33':
tr[h] = require('./data/x33');
break;
case '4d':
tr[h] = require('./data/x4d');
break;
case '4e':
tr[h] = require('./data/x4e');
break;
case '4f':
tr[h] = require('./data/x4f');
break;
case '50':
tr[h] = require('./data/x50');
break;
case '51':
tr[h] = require('./data/x51');
break;
case '52':
tr[h] = require('./data/x52');
break;
case '53':
tr[h] = require('./data/x53');
break;
case '54':
tr[h] = require('./data/x54');
break;
case '55':
tr[h] = require('./data/x55');
break;
case '56':
tr[h] = require('./data/x56');
break;
case '57':
tr[h] = require('./data/x57');
break;
case '58':
tr[h] = require('./data/x58');
break;
case '59':
tr[h] = require('./data/x59');
break;
case '5a':
tr[h] = require('./data/x5a');
break;
case '5b':
tr[h] = require('./data/x5b');
break;
case '5c':
tr[h] = require('./data/x5c');
break;
case '5d':
tr[h] = require('./data/x5d');
break;
case '5e':
tr[h] = require('./data/x5e');
break;
case '5f':
tr[h] = require('./data/x5f');
break;
case '60':
tr[h] = require('./data/x60');
break;
case '61':
tr[h] = require('./data/x61');
break;
case '62':
tr[h] = require('./data/x62');
break;
case '63':
tr[h] = require('./data/x63');
break;
case '64':
tr[h] = require('./data/x64');
break;
case '65':
tr[h] = require('./data/x65');
break;
case '66':
tr[h] = require('./data/x66');
break;
case '67':
tr[h] = require('./data/x67');
break;
case '68':
tr[h] = require('./data/x68');
break;
case '69':
tr[h] = require('./data/x69');
break;
case '6a':
tr[h] = require('./data/x6a');
break;
case '6b':
tr[h] = require('./data/x6b');
break;
case '6c':
tr[h] = require('./data/x6c');
break;
case '6d':
tr[h] = require('./data/x6d');
break;
case '6e':
tr[h] = require('./data/x6e');
break;
case '6f':
tr[h] = require('./data/x6f');
break;
case '70':
tr[h] = require('./data/x70');
break;
case '71':
tr[h] = require('./data/x71');
break;
case '72':
tr[h] = require('./data/x72');
break;
case '73':
tr[h] = require('./data/x73');
break;
case '74':
tr[h] = require('./data/x74');
break;
case '75':
tr[h] = require('./data/x75');
break;
case '76':
tr[h] = require('./data/x76');
break;
case '77':
tr[h] = require('./data/x77');
break;
case '78':
tr[h] = require('./data/x78');
break;
case '79':
tr[h] = require('./data/x79');
break;
case '7a':
tr[h] = require('./data/x7a');
break;
case '7b':
tr[h] = require('./data/x7b');
break;
case '7c':
tr[h] = require('./data/x7c');
break;
case '7d':
tr[h] = require('./data/x7d');
break;
case '7e':
tr[h] = require('./data/x7e');
break;
case '7f':
tr[h] = require('./data/x7f');
break;
case '80':
tr[h] = require('./data/x80');
break;
case '81':
tr[h] = require('./data/x81');
break;
case '82':
tr[h] = require('./data/x82');
break;
case '83':
tr[h] = require('./data/x83');
break;
case '84':
tr[h] = require('./data/x84');
break;
case '85':
tr[h] = require('./data/x85');
break;
case '86':
tr[h] = require('./data/x86');
break;
case '87':
tr[h] = require('./data/x87');
break;
case '88':
tr[h] = require('./data/x88');
break;
case '89':
tr[h] = require('./data/x89');
break;
case '8a':
tr[h] = require('./data/x8a');
break;
case '8b':
tr[h] = require('./data/x8b');
break;
case '8c':
tr[h] = require('./data/x8c');
break;
case '8d':
tr[h] = require('./data/x8d');
break;
case '8e':
tr[h] = require('./data/x8e');
break;
case '8f':
tr[h] = require('./data/x8f');
break;
case '90':
tr[h] = require('./data/x90');
break;
case '91':
tr[h] = require('./data/x91');
break;
case '92':
tr[h] = require('./data/x92');
break;
case '93':
tr[h] = require('./data/x93');
break;
case '94':
tr[h] = require('./data/x94');
break;
case '95':
tr[h] = require('./data/x95');
break;
case '96':
tr[h] = require('./data/x96');
break;
case '97':
tr[h] = require('./data/x97');
break;
case '98':
tr[h] = require('./data/x98');
break;
case '99':
tr[h] = require('./data/x99');
break;
case '9a':
tr[h] = require('./data/x9a');
break;
case '9b':
tr[h] = require('./data/x9b');
break;
case '9c':
tr[h] = require('./data/x9c');
break;
case '9d':
tr[h] = require('./data/x9d');
break;
case '9e':
tr[h] = require('./data/x9e');
break;
case '9f':
tr[h] = require('./data/x9f');
break;
case 'a0':
tr[h] = require('./data/xa0');
break;
case 'a1':
tr[h] = require('./data/xa1');
break;
case 'a2':
tr[h] = require('./data/xa2');
break;
case 'a3':
tr[h] = require('./data/xa3');
break;
case 'a4':
tr[h] = require('./data/xa4');
break;
case 'ac':
tr[h] = require('./data/xac');
break;
case 'ad':
tr[h] = require('./data/xad');
break;
case 'ae':
tr[h] = require('./data/xae');
break;
case 'af':
tr[h] = require('./data/xaf');
break;
case 'b0':
tr[h] = require('./data/xb0');
break;
case 'b1':
tr[h] = require('./data/xb1');
break;
case 'b2':
tr[h] = require('./data/xb2');
break;
case 'b3':
tr[h] = require('./data/xb3');
break;
case 'b4':
tr[h] = require('./data/xb4');
break;
case 'b5':
tr[h] = require('./data/xb5');
break;
case 'b6':
tr[h] = require('./data/xb6');
break;
case 'b7':
tr[h] = require('./data/xb7');
break;
case 'b8':
tr[h] = require('./data/xb8');
break;
case 'b9':
tr[h] = require('./data/xb9');
break;
case 'ba':
tr[h] = require('./data/xba');
break;
case 'bb':
tr[h] = require('./data/xbb');
break;
case 'bc':
tr[h] = require('./data/xbc');
break;
case 'bd':
tr[h] = require('./data/xbd');
break;
case 'be':
tr[h] = require('./data/xbe');
break;
case 'bf':
tr[h] = require('./data/xbf');
break;
case 'c0':
tr[h] = require('./data/xc0');
break;
case 'c1':
tr[h] = require('./data/xc1');
break;
case 'c2':
tr[h] = require('./data/xc2');
break;
case 'c3':
tr[h] = require('./data/xc3');
break;
case 'c4':
tr[h] = require('./data/xc4');
break;
case 'c5':
tr[h] = require('./data/xc5');
break;
case 'c6':
tr[h] = require('./data/xc6');
break;
case 'c7':
tr[h] = require('./data/xc7');
break;
case 'c8':
tr[h] = require('./data/xc8');
break;
case 'c9':
tr[h] = require('./data/xc9');
break;
case 'ca':
tr[h] = require('./data/xca');
break;
case 'cb':
tr[h] = require('./data/xcb');
break;
case 'cc':
tr[h] = require('./data/xcc');
break;
case 'cd':
tr[h] = require('./data/xcd');
break;
case 'ce':
tr[h] = require('./data/xce');
break;
case 'cf':
tr[h] = require('./data/xcf');
break;
case 'd0':
tr[h] = require('./data/xd0');
break;
case 'd1':
tr[h] = require('./data/xd1');
break;
case 'd2':
tr[h] = require('./data/xd2');
break;
case 'd3':
tr[h] = require('./data/xd3');
break;
case 'd4':
tr[h] = require('./data/xd4');
break;
case 'd5':
tr[h] = require('./data/xd5');
break;
case 'd6':
tr[h] = require('./data/xd6');
break;
case 'd7':
tr[h] = require('./data/xd7');
break;
case 'f9':
tr[h] = require('./data/xf9');
break;
case 'fa':
tr[h] = require('./data/xfa');
break;
case 'fb':
tr[h] = require('./data/xfb');
break;
case 'fc':
tr[h] = require('./data/xfc');
break;
case 'fd':
tr[h] = require('./data/xfd');
break;
case 'fe':
tr[h] = require('./data/xfe');
break;
case 'ff':
tr[h] = require('./data/xff');
break;
default:
// console.error("Unidecode file not found for h=", h);
return '';
}
}
return tr[h][l];
}
}
function dec2hex(i) {
return (i + 0x100).toString(16).substr(-2);
}
function utf8_to_utf16(raw) {
var b1, b2, b3, b4,
x, y, z;
while (Array.isArray(raw)) raw = raw[0];
switch (raw.length) {
case 1:
return ord(raw);
// http://en.wikipedia.org/wiki/UTF-8
case 2:
b1 = ord(raw.substr(0, 1));
b2 = ord(raw.substr(1, 1));
x = ((b1 & 0x03) << 6) | (b2 & 0x3F);
y = (b1 & 0x1C) >> 2;
return (y << 8) | x;
case 3:
b1 = ord(raw.substr(0, 1));
b2 = ord(raw.substr(1, 1));
b3 = ord(raw.substr(2, 1));
x = ((b2 & 0x03) << 6) | (b3 & 0x3F);
y = ((b1 & 0x0F) << 4) | ((b2 & 0x3C) >> 2);
return (y << 8) | x;
default:
b1 = ord(raw.substr(0, 1));
b2 = ord(raw.substr(1, 1));
b3 = ord(raw.substr(2, 1));
b4 = ord(raw.substr(3, 1));
x = ((b3 & 0x03) << 6) | (b4 & 0x3F);
y = ((b2 & 0x0F) << 4) | ((b3 & 0x3C) >> 2);
z = ((b1 & 0x07) << 5) | ((b2 & 0x30) >> 4);
return (z << 16) | (y << 8) | x;
}
}
/* From php.js */
function ord(string) {
// Returns the codepoint value of a character
//
// version: 1109.2015
// discuss at: http://phpjs.org/functions/ord
// + original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
// + bugfixed by: Onno Marsman
// + improved by: Brett Zamir (http://brett-zamir.me)
// + input by: incidence
// * example 1: ord('K');
// * returns 1: 75
// * example 2: ord('\uD800\uDC00'); // surrogate pair to create a single Unicode character
// * returns 2: 65536
var str = string + '',
code = str.charCodeAt(0);
if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters)
var hi = code;
if (str.length === 1) {
return code; // This is just a high surrogate with no following low surrogate, so we return its value;
// we could also throw an error as it is not a complete character, but someone may want to know
}
var low = str.charCodeAt(1);
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
}
if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate
return code; // This is just a low surrogate with no preceding high surrogate, so we return its value;
// we could also throw an error as it is not a complete character, but someone may want to know
}
return code;
}