JavaScript module for accessing Unicode character data from UnicodeData.txt.
Install with npm
command as:
npm install unicharadata
then, load it in js files as:
const unicharadata = require("unicharadata");
NOTE: For file size reducing, The current module implementation embeds UnicodeData.txt source and builds complete character database on load time.
This method uses large pre built database json file. It would be about 1sec fatser than building database (around 4sec).
const unicharadata = require("unicharadata/trial/unicharadata-load");
Load with standard script
tag as:
<script src="unicharadata.js"></script>
see: browser-example.html ( demo )
Links via CDN
- https://raw.githack.com/bellbind/unicharadata/master/unicharadata.js
- https://rawgit.com/bellbind/unicharadata/master/unicharadata.js
NOTE: Detail of the character propeties are in http://www.unicode.org/reports/tr44/#UnicodeData.txt
unicharadata.lookup(name, defaultch = "")
: search character from its name- use "Name" otherwise "Unicode 1 Name" property
unicharadata.lookupname(ch, defaultname = "")
: get the name usedlookup(name)
unicharadata.name(ch, defaultname = "")
: get "Name" stringunicharadata.category(ch)
: get "General Category" string- see value table
unicharadata.combining(ch)
: get "Canonical Combining Class" number- see value table
unicharadata.bidirectional(ch)
: get "Bidi Class" string- see value table
unicharadata.decomposition(ch)
: get "Decomposition Mapping" text- return
ch
itself when no decomposition mapping
- return
unicharadata.decompositionTag(ch)
: get "Decomposition Type" tag string- see value table
unicharadata.decimal(ch, defaultval = NaN)
: get "Numeric Value" integer as single decimal typeunicharadata.digit(ch, defaultval = NaN)
: get "Numeric Value" integer as single digit type- overwrapped
decimal(ch)
. this includes as superscript (e.g. U+00B9)
- overwrapped
unicharadata.numeric(ch, defaultval = NaN)
: get "Numeric Value" float number as numeric type- overwrapped
digit(ch)
, this includes non single digit numbers.
- overwrapped
unicharadata.mirrored(ch)
: get "Bidi Mirrored" string"Y"
or"N"
unicharadata.unicode1name(ch, defaultname = "")
: get "Unicode 1 Name" stringunicharadata.isocomment(ch, defaultcomment = "")
: get "ISO Comment" stringunicharadata.upper(ch, defaultch = "")
: get "Simple Uppercase Mapping" characterunicharadata.lower(ch, defaultch = "")
: get "Simple Lowercase Mapping" characterunicharadata.title(ch, defaultch = "")
: get "Simple Titlecase Mapping" characterunicharadata.splitCombined(text)
: split a text to an array of strings that contains a character and following combining characters.
"use strict";
const unicharadata = require("unicharadata");
// API
console.assert(unicharadata.name("A") === "LATIN CAPITAL LETTER A");
console.assert(unicharadata.lookup("LATIN CAPITAL LETTER A") === "A");
console.assert(unicharadata.category("A") === "Lu");
console.assert(unicharadata.combining("A") === 0);
console.assert(unicharadata.bidirectional("A") === "L");
console.assert(unicharadata.decomposition("A") === "A");
console.assert(unicharadata.decompositionTag("A") === "");
console.assert(isNaN(unicharadata.decimal("A")));
console.assert(isNaN(unicharadata.digit("A")));
console.assert(isNaN(unicharadata.numeric("A")));
console.assert(unicharadata.mirrored("A") === "N");
console.assert(unicharadata.unicode1name("A") === "");
console.assert(unicharadata.lookupname("A") === "LATIN CAPITAL LETTER A");
console.assert(unicharadata.isocomment("A") === "");
console.assert(unicharadata.upper("A") === "");
console.assert(unicharadata.lower("A") === "a");
console.assert(unicharadata.title("A") === "");
const splitted = unicharadata.splitCombined("γγγπγ§γ".normalize("NFD"));
console.assert(splitted[0] === "γ");
console.assert(splitted[1] === "γ");
console.assert(splitted[2] === "γ".normalize("NFD"));
console.assert(splitted[3] === "π");
console.assert(splitted[4] === "γ§".normalize("NFD"));
console.assert(splitted[5] === "γ");
// specific
console.assert(unicharadata.decomposition("γ") === "γ‘γΌγγ«");
console.assert(unicharadata.decompositionTag("γ") === "<square>");
console.assert(unicharadata.decimal("οΌ") === 1);
console.assert(unicharadata.digit("β£") === 4);
console.assert(unicharadata.numeric("β
") === 1 / 8);
console.assert(unicharadata.unicode1name("\n") === "LINE FEED (LF)");
console.assert(unicharadata.lookupname("\n") === "LINE FEED (LF)");
npm run download
: updateUnicodeData.json
fromUnicodeData.txt
on the webnpm run build
: updateunicharadata.js
fromunicharadata-raw.js
andUnicodeData.json
npm test
: run testsnpm run eslint
: check coding style with eslint- setup with
npm install
required
- setup with
The package version is based on the
Unicode "Version" of UnicodeData.txt
.
- e.g.
9.0.0-alpha.2
: the Unicode version is9.0.0
unicharadata
website: https://github.com/bellbind/unicharadata- Unicode Character Data: http://unicode.org/ucd/
- Spec (Unicode Standard Annex #44): http://unicode.org/reports/tr44/
- Data: http://unicode.org/Public/UCD/latest/ucd/
- Python builtin
unicodedata
lib: https://docs.python.org/3/library/unicodedata.html- function names borrowed from the library.