/** * Unidecode takes UTF-8 data and tries to represent it in US-ASCII characters (i.e., the universally displayable characters between 0x00 and 0x7F). * The representation is almost always an attempt at transliteration -- i.e., conveying, in Roman letters, the pronunciation expressed by the text in * some other writing system. * * The tables used (in data) are converted from the tables provided in the perl library Text::Unidecode (http://search.cpan.org/dist/Text-Unidecode/lib/Text/Unidecode.pm) * and are distributed under the perl license * * @author Francois-Guillaume Ribreau * * Based on the port of unidecode for php */ 'use strict'; var tr = {}; var utf8_rx = /(?![\x00-\x7F]|[\xC0-\xDF][\x80-\xBF]|[\xE0-\xEF][\x80-\xBF]{2}|[\xF0-\xF7][\x80-\xBF]{3})./g; module.exports = function (str) { return str.replace(utf8_rx, unidecode_internal_replace); }; function unidecode_internal_replace(match) { var utf16 = utf8_to_utf16(match); if (utf16 > 0xFFFF) { return '_'; } else { var h = utf16 >> 8; var l = utf16 & 0xFF; // (18) 18 > h < 1e (30) if (h > 24 && h < 30) return ''; //(d7) 215 > h < 249 (f9) no supported if (h > 215 && h < 249) return ''; if (!tr[h]) { switch (dec2hex(h)) { case '00': tr[h] = require('./data/x00'); break; case '01': tr[h] = require('./data/x01'); break; case '02': tr[h] = require('./data/x02'); break; case '03': tr[h] = require('./data/x03'); break; case '04': tr[h] = require('./data/x04'); break; case '05': tr[h] = require('./data/x05'); break; case '06': tr[h] = require('./data/x06'); break; case '07': tr[h] = require('./data/x07'); break; case '09': tr[h] = require('./data/x09'); break; case '0a': tr[h] = require('./data/x0a'); break; case '0b': tr[h] = require('./data/x0b'); break; case '0c': tr[h] = require('./data/x0c'); break; case '0d': tr[h] = require('./data/x0d'); break; case '0e': tr[h] = require('./data/x0e'); break; case '0f': tr[h] = require('./data/x0f'); break; case '10': tr[h] = require('./data/x10'); break; case '11': tr[h] = require('./data/x11'); break; case '12': tr[h] = require('./data/x12'); break; case '13': tr[h] = require('./data/x13'); break; case '14': tr[h] = require('./data/x14'); break; case '15': tr[h] = require('./data/x15'); break; case '16': tr[h] = require('./data/x16'); break; case '17': tr[h] = require('./data/x17'); break; case '18': tr[h] = require('./data/x18'); break; case '1e': tr[h] = require('./data/x1e'); break; case '1f': tr[h] = require('./data/x1f'); break; case '20': tr[h] = require('./data/x20'); break; case '21': tr[h] = require('./data/x21'); break; case '22': tr[h] = require('./data/x22'); break; case '23': tr[h] = require('./data/x23'); break; case '24': tr[h] = require('./data/x24'); break; case '25': tr[h] = require('./data/x25'); break; case '26': tr[h] = require('./data/x26'); break; case '27': tr[h] = require('./data/x27'); break; case '28': tr[h] = require('./data/x28'); break; case '2e': tr[h] = require('./data/x2e'); break; case '2f': tr[h] = require('./data/x2f'); break; case '30': tr[h] = require('./data/x30'); break; case '31': tr[h] = require('./data/x31'); break; case '32': tr[h] = require('./data/x32'); break; case '33': tr[h] = require('./data/x33'); break; case '4d': tr[h] = require('./data/x4d'); break; case '4e': tr[h] = require('./data/x4e'); break; case '4f': tr[h] = require('./data/x4f'); break; case '50': tr[h] = require('./data/x50'); break; case '51': tr[h] = require('./data/x51'); break; case '52': tr[h] = require('./data/x52'); break; case '53': tr[h] = require('./data/x53'); break; case '54': tr[h] = require('./data/x54'); break; case '55': tr[h] = require('./data/x55'); break; case '56': tr[h] = require('./data/x56'); break; case '57': tr[h] = require('./data/x57'); break; case '58': tr[h] = require('./data/x58'); break; case '59': tr[h] = require('./data/x59'); break; case '5a': tr[h] = require('./data/x5a'); break; case '5b': tr[h] = require('./data/x5b'); break; case '5c': tr[h] = require('./data/x5c'); break; case '5d': tr[h] = require('./data/x5d'); break; case '5e': tr[h] = require('./data/x5e'); break; case '5f': tr[h] = require('./data/x5f'); break; case '60': tr[h] = require('./data/x60'); break; case '61': tr[h] = require('./data/x61'); break; case '62': tr[h] = require('./data/x62'); break; case '63': tr[h] = require('./data/x63'); break; case '64': tr[h] = require('./data/x64'); break; case '65': tr[h] = require('./data/x65'); break; case '66': tr[h] = require('./data/x66'); break; case '67': tr[h] = require('./data/x67'); break; case '68': tr[h] = require('./data/x68'); break; case '69': tr[h] = require('./data/x69'); break; case '6a': tr[h] = require('./data/x6a'); break; case '6b': tr[h] = require('./data/x6b'); break; case '6c': tr[h] = require('./data/x6c'); break; case '6d': tr[h] = require('./data/x6d'); break; case '6e': tr[h] = require('./data/x6e'); break; case '6f': tr[h] = require('./data/x6f'); break; case '70': tr[h] = require('./data/x70'); break; case '71': tr[h] = require('./data/x71'); break; case '72': tr[h] = require('./data/x72'); break; case '73': tr[h] = require('./data/x73'); break; case '74': tr[h] = require('./data/x74'); break; case '75': tr[h] = require('./data/x75'); break; case '76': tr[h] = require('./data/x76'); break; case '77': tr[h] = require('./data/x77'); break; case '78': tr[h] = require('./data/x78'); break; case '79': tr[h] = require('./data/x79'); break; case '7a': tr[h] = require('./data/x7a'); break; case '7b': tr[h] = require('./data/x7b'); break; case '7c': tr[h] = require('./data/x7c'); break; case '7d': tr[h] = require('./data/x7d'); break; case '7e': tr[h] = require('./data/x7e'); break; case '7f': tr[h] = require('./data/x7f'); break; case '80': tr[h] = require('./data/x80'); break; case '81': tr[h] = require('./data/x81'); break; case '82': tr[h] = require('./data/x82'); break; case '83': tr[h] = require('./data/x83'); break; case '84': tr[h] = require('./data/x84'); break; case '85': tr[h] = require('./data/x85'); break; case '86': tr[h] = require('./data/x86'); break; case '87': tr[h] = require('./data/x87'); break; case '88': tr[h] = require('./data/x88'); break; case '89': tr[h] = require('./data/x89'); break; case '8a': tr[h] = require('./data/x8a'); break; case '8b': tr[h] = require('./data/x8b'); break; case '8c': tr[h] = require('./data/x8c'); break; case '8d': tr[h] = require('./data/x8d'); break; case '8e': tr[h] = require('./data/x8e'); break; case '8f': tr[h] = require('./data/x8f'); break; case '90': tr[h] = require('./data/x90'); break; case '91': tr[h] = require('./data/x91'); break; case '92': tr[h] = require('./data/x92'); break; case '93': tr[h] = require('./data/x93'); break; case '94': tr[h] = require('./data/x94'); break; case '95': tr[h] = require('./data/x95'); break; case '96': tr[h] = require('./data/x96'); break; case '97': tr[h] = require('./data/x97'); break; case '98': tr[h] = require('./data/x98'); break; case '99': tr[h] = require('./data/x99'); break; case '9a': tr[h] = require('./data/x9a'); break; case '9b': tr[h] = require('./data/x9b'); break; case '9c': tr[h] = require('./data/x9c'); break; case '9d': tr[h] = require('./data/x9d'); break; case '9e': tr[h] = require('./data/x9e'); break; case '9f': tr[h] = require('./data/x9f'); break; case 'a0': tr[h] = require('./data/xa0'); break; case 'a1': tr[h] = require('./data/xa1'); break; case 'a2': tr[h] = require('./data/xa2'); break; case 'a3': tr[h] = require('./data/xa3'); break; case 'a4': tr[h] = require('./data/xa4'); break; case 'ac': tr[h] = require('./data/xac'); break; case 'ad': tr[h] = require('./data/xad'); break; case 'ae': tr[h] = require('./data/xae'); break; case 'af': tr[h] = require('./data/xaf'); break; case 'b0': tr[h] = require('./data/xb0'); break; case 'b1': tr[h] = require('./data/xb1'); break; case 'b2': tr[h] = require('./data/xb2'); break; case 'b3': tr[h] = require('./data/xb3'); break; case 'b4': tr[h] = require('./data/xb4'); break; case 'b5': tr[h] = require('./data/xb5'); break; case 'b6': tr[h] = require('./data/xb6'); break; case 'b7': tr[h] = require('./data/xb7'); break; case 'b8': tr[h] = require('./data/xb8'); break; case 'b9': tr[h] = require('./data/xb9'); break; case 'ba': tr[h] = require('./data/xba'); break; case 'bb': tr[h] = require('./data/xbb'); break; case 'bc': tr[h] = require('./data/xbc'); break; case 'bd': tr[h] = require('./data/xbd'); break; case 'be': tr[h] = require('./data/xbe'); break; case 'bf': tr[h] = require('./data/xbf'); break; case 'c0': tr[h] = require('./data/xc0'); break; case 'c1': tr[h] = require('./data/xc1'); break; case 'c2': tr[h] = require('./data/xc2'); break; case 'c3': tr[h] = require('./data/xc3'); break; case 'c4': tr[h] = require('./data/xc4'); break; case 'c5': tr[h] = require('./data/xc5'); break; case 'c6': tr[h] = require('./data/xc6'); break; case 'c7': tr[h] = require('./data/xc7'); break; case 'c8': tr[h] = require('./data/xc8'); break; case 'c9': tr[h] = require('./data/xc9'); break; case 'ca': tr[h] = require('./data/xca'); break; case 'cb': tr[h] = require('./data/xcb'); break; case 'cc': tr[h] = require('./data/xcc'); break; case 'cd': tr[h] = require('./data/xcd'); break; case 'ce': tr[h] = require('./data/xce'); break; case 'cf': tr[h] = require('./data/xcf'); break; case 'd0': tr[h] = require('./data/xd0'); break; case 'd1': tr[h] = require('./data/xd1'); break; case 'd2': tr[h] = require('./data/xd2'); break; case 'd3': tr[h] = require('./data/xd3'); break; case 'd4': tr[h] = require('./data/xd4'); break; case 'd5': tr[h] = require('./data/xd5'); break; case 'd6': tr[h] = require('./data/xd6'); break; case 'd7': tr[h] = require('./data/xd7'); break; case 'f9': tr[h] = require('./data/xf9'); break; case 'fa': tr[h] = require('./data/xfa'); break; case 'fb': tr[h] = require('./data/xfb'); break; case 'fc': tr[h] = require('./data/xfc'); break; case 'fd': tr[h] = require('./data/xfd'); break; case 'fe': tr[h] = require('./data/xfe'); break; case 'ff': tr[h] = require('./data/xff'); break; default: // console.error("Unidecode file not found for h=", h); return ''; } } return tr[h][l]; } } function dec2hex(i) { return (i + 0x100).toString(16).substr(-2); } function utf8_to_utf16(raw) { var b1, b2, b3, b4, x, y, z; while (Array.isArray(raw)) raw = raw[0]; switch (raw.length) { case 1: return ord(raw); // http://en.wikipedia.org/wiki/UTF-8 case 2: b1 = ord(raw.substr(0, 1)); b2 = ord(raw.substr(1, 1)); x = ((b1 & 0x03) << 6) | (b2 & 0x3F); y = (b1 & 0x1C) >> 2; return (y << 8) | x; case 3: b1 = ord(raw.substr(0, 1)); b2 = ord(raw.substr(1, 1)); b3 = ord(raw.substr(2, 1)); x = ((b2 & 0x03) << 6) | (b3 & 0x3F); y = ((b1 & 0x0F) << 4) | ((b2 & 0x3C) >> 2); return (y << 8) | x; default: b1 = ord(raw.substr(0, 1)); b2 = ord(raw.substr(1, 1)); b3 = ord(raw.substr(2, 1)); b4 = ord(raw.substr(3, 1)); x = ((b3 & 0x03) << 6) | (b4 & 0x3F); y = ((b2 & 0x0F) << 4) | ((b3 & 0x3C) >> 2); z = ((b1 & 0x07) << 5) | ((b2 & 0x30) >> 4); return (z << 16) | (y << 8) | x; } } /* From php.js */ function ord(string) { // Returns the codepoint value of a character // // version: 1109.2015 // discuss at: http://phpjs.org/functions/ord // + original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) // + bugfixed by: Onno Marsman // + improved by: Brett Zamir (http://brett-zamir.me) // + input by: incidence // * example 1: ord('K'); // * returns 1: 75 // * example 2: ord('\uD800\uDC00'); // surrogate pair to create a single Unicode character // * returns 2: 65536 var str = string + '', code = str.charCodeAt(0); if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters) var hi = code; if (str.length === 1) { return code; // This is just a high surrogate with no following low surrogate, so we return its value; // we could also throw an error as it is not a complete character, but someone may want to know } var low = str.charCodeAt(1); return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000; } if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate return code; // This is just a low surrogate with no preceding high surrogate, so we return its value; // we could also throw an error as it is not a complete character, but someone may want to know } return code; }