Barqawiz/Shakkala

#Question: Is it possible to use TensorFlow.js in browser

boumaza429 opened this issue · 2 comments

#Question: Is it possible to use TensorFlow.js in browser

Yes it works; i tried it before.

I was making a JavaScript interface/port for Shakkala. Didn't finish it due to this two main problems with TensorFlow.js:

  1. TensorFlow.js can be so unpredictably slow on some devices and browsers to a point that makes it commercially in-viable. You can't make the client's device freeze for 5 minutes for auto tashkīl!
  2. Not really a TensorFlow.js problem. The size of the Shakkala's models is too big for the web. A lot of Arabic internet users have slow internet/low internet bandwidth, Loading a 3-28 MB for unnecessary function is luxury.
    Out of hobby projects i didn't find any good use for Shakkala in the browser, If you want to add Shakkala to your website you would be better off making a simple API for Shakkala on your server. Even a simple HTTP API will get the job done.

Anyway, if you still interested in TensorFlow.js. This Typescript code is all what you will need. Call prepareInput on the text before passing it to TensorFlow.js and call prepareOutput on the output to decode it. Last time i tested it was 3-2 years ago. Didn't test it with the latest Shakkala (maybe, expect some bugs):

const strToIntData = new Map<string, number>([
	['“',55], ['ئ',56],['°',5],['و',6],['ε',7],['ﺇ',57],['ﺈ',58],['ﻧ',102],['\t',8],
	['‏',60],['ـ',9],['۷',106],['ﺄ',61],['۸',10],['•',62],['ו',64],['ظ',11],['ر',65],
	['ﻠ',66],['ψ',12],['ﻛ',13],['<GO>',2],['χ',14],['ز',92],['آ',15],['ﺁ',16],['ا',68],
	['؛',17],['έ',69],['ي',70],['ث',71],['ك',72],['أ',73],['«',74],['ص',75],['υ',20],
	['ﻹ',21],['ﺔ',76],['ت',22],['…',23],['ό',77],['τ',78],['ش',18],['غ',24],['ﻻ',79],
	['﴿',25],['ج',59],['σ',27],['ρ',26],['ن',81],['س',82],['ﻵ',83],[' ',84],['”',85],
	['‍',31],['ﻓ',33],['ﻴ',88],['ω',89],['ﺌ',90],['‘',34],['κ',35],['γ',80],['ل',29],
	['ط',93],['ﺂ',96],['ι',36],['ع',95],['ν',63],['ﻷ',98],['ے',37],['ق',38],['خ',19],
	['ى',39],['­',40],['ح',86],['ώ',103],[' ',28],['‫',94],['’',41],['–',42],['<EOS>',3],
	['ﻣ',43],['﴾',44],['ٰ',45],['<UNK>',1],['»',30],['ذ',97],['ﺑ',32],['ﻟ',99],['ف',46],
	['د',104],['۵',109],['ﺃ',87],['α',47],['م',48],['ه',49],['‬',108],['ؤ',50],['θ',51],
	['ﺋ',100],['ی',105],['´',110],['ض',111],['<PAD>',0],['​',52],['٪',91],['ί',112],
	['إ',119],['؟',101],['ﺒ',113],['ο',114],['‰',115],['π',116],['‎',117],['ﮐ',53],
	['ب',118],['ٱ',67],['μ',54],['ة',107],['ء',120]
]);

const intToStrData = ["<PAD>","<UNK>","<GO>","<EOS>","ـ","َ","ُّ","َّ","ـ","ِّ","ّ","ّْ","ٍّ","ِّ","ٍّ","ٌّ","َّ","ُ","ٌّ","ًّ","ْ","ٍ","ِ","ُّ","ًّ","ٌ","ً","ّّ"];

const harakat = [1614,1615,1616,1618,1617,1611,1612,1613];

const connector = 1617;

// TODO: add support for SMP characters, UTF-16
export function prepareInput(input: string): Array<string|number> {
  const output = [];

  for (let i = 0; i < input.length; i++) {
    output.push(strToIntData.get(input.charAt(i)) || strToIntData.get('<UNK>'));
  }

  return output;
}

function argMax(input: Float32Array): number {
  return []
    .map
    .call(input, (x: number, i: number) => [x, i])
    .reduce((r: Array<number>, a: Array<number>) => (a[0] > r[0] ? a : r))[1];
}

function clean(input: Array<number>): Array<number> {
  const clean = strToIntData.get('<PAD>');

  return input.filter(n => n !== clean);
}

function logitsToString(input: Array<number>): string {
  return input.reduce((str, n) => str + intToStrData[n], '');
}

function combineTextWithHarakat(input: string, output: string) {
  const harakat: Array<string> = [];
  let o = '';

  for (let i = 0; i < Math.min(input.length, output.length); i++) {
    const character = input.charAt(i);
    let haraka = output.charAt(i).replace('<UNK>', '').replace('ـ', '');

    if (character === ' ' && haraka.length && haraka.charCodeAt(0) === connector) {
      haraka = harakat.pop() + haraka;
    }

    harakat.push(haraka);
  }

  for (let i = 0; i < input.length; i++) {
    o += input.charAt(i) + (harakat[i] || '');
  }

  return o;
}

export function prepareOutput(logits: Array<Float32Array>, input: string): string {
  return combineTextWithHarakat(input, logitsToString(clean(logits.map(argMax))));
}

i converted the model using command

pip install tensorflowjs
tensorflowjs_converter --input_format=keras   simple_model.h5 simple_model
// @ts-check
const strToIntData = new Map([
  ['“', 55],
  ['ئ', 56],
  ['°', 5],
  ['و', 6],
  ['ε', 7],
  ['ﺇ', 57],
  ['ﺈ', 58],
  ['ﻧ', 102],
  ['\t', 8],
  ['‏', 60],
  ['ـ', 9],
  ['۷', 106],
  ['ﺄ', 61],
  ['۸', 10],
  ['•', 62],
  ['ו', 64],
  ['ظ', 11],
  ['ر', 65],
  ['ﻠ', 66],
  ['ψ', 12],
  ['ﻛ', 13],
  ['<GO>', 2],
  ['χ', 14],
  ['ز', 92],
  ['آ', 15],
  ['ﺁ', 16],
  ['ا', 68],
  ['؛', 17],
  ['έ', 69],
  ['ي', 70],
  ['ث', 71],
  ['ك', 72],
  ['أ', 73],
  ['«', 74],
  ['ص', 75],
  ['υ', 20],
  ['ﻹ', 21],
  ['ﺔ', 76],
  ['ت', 22],
  ['…', 23],
  ['ό', 77],
  ['τ', 78],
  ['ش', 18],
  ['غ', 24],
  ['ﻻ', 79],
  ['﴿', 25],
  ['ج', 59],
  ['σ', 27],
  ['ρ', 26],
  ['ن', 81],
  ['س', 82],
  ['ﻵ', 83],
  [' ', 84],
  ['”', 85],
  ['‍', 31],
  ['ﻓ', 33],
  ['ﻴ', 88],
  ['ω', 89],
  ['ﺌ', 90],
  ['‘', 34],
  ['κ', 35],
  ['γ', 80],
  ['ل', 29],
  ['ط', 93],
  ['ﺂ', 96],
  ['ι', 36],
  ['ع', 95],
  ['ν', 63],
  ['ﻷ', 98],
  ['ے', 37],
  ['ق', 38],
  ['خ', 19],
  ['ى', 39],
  ['­', 40],
  ['ح', 86],
  ['ώ', 103],
  [' ', 28],
  ['‫', 94],
  ['’', 41],
  ['–', 42],
  ['<EOS>', 3],
  ['ﻣ', 43],
  ['﴾', 44],
  ['ٰ', 45],
  ['<UNK>', 1],
  ['»', 30],
  ['ذ', 97],
  ['ﺑ', 32],
  ['ﻟ', 99],
  ['ف', 46],
  ['د', 104],
  ['۵', 109],
  ['ﺃ', 87],
  ['α', 47],
  ['م', 48],
  ['ه', 49],
  ['‬', 108],
  ['ؤ', 50],
  ['θ', 51],
  ['ﺋ', 100],
  ['ی', 105],
  ['´', 110],
  ['ض', 111],
  ['<PAD>', 0],
  ['​', 52],
  ['٪', 91],
  ['ί', 112],
  ['إ', 119],
  ['؟', 101],
  ['ﺒ', 113],
  ['ο', 114],
  ['‰', 115],
  ['π', 116],
  ['‎', 117],
  ['ﮐ', 53],
  ['ب', 118],
  ['ٱ', 67],
  ['μ', 54],
  ['ة', 107],
  ['ء', 120],
]);
const intToStrData = [
  '<PAD>',
  '<UNK>',
  '<GO>',
  '<EOS>',
  'ـ',
  'َ',
  'ُّ',
  'َّ',
  'ـ',
  'ِّ',
  'ّ',
  'ّْ',
  'ٍّ',
  'ِّ',
  'ٍّ',
  'ٌّ',
  'َّ',
  'ُ',
  'ٌّ',
  'ًّ',
  'ْ',
  'ٍ',
  'ِ',
  'ُّ',
  'ًّ',
  'ٌ',
  'ً',
  'ّّ',
];
const harakat = [1614, 1615, 1616, 1618, 1617, 1611, 1612, 1613];
const connector = 1617;
// TODO: add support for SMP characters, UTF-16
function prepareInput(input) {
  const output = [];
  for (let i = 0; i < input.length; i++) {
    output.push(strToIntData.get(input.charAt(i)) || strToIntData.get('<UNK>'));
  }
  return output;
}
function argMax(input) {
  return [].map
    .call(input, (x, i) => [x, i])
    .reduce((r, a) => (a[0] > r[0] ? a : r))[1];
}
function clean(input) {
  const clean = strToIntData.get('<PAD>');
  return input.filter((n) => n !== clean);
}
function logitsToString(input) {
  return input.reduce((str, n) => str + intToStrData[n], '');
}
function combineTextWithHarakat(input, output) {
  const harakat = [];
  let o = '';
  for (let i = 0; i < Math.min(input.length, output.length); i++) {
    const character = input.charAt(i);
    let haraka = output.charAt(i).replace('<UNK>', '').replace('ـ', '');
    if (
      character === ' ' &&
      haraka.length &&
      haraka.charCodeAt(0) === connector
    ) {
      haraka = harakat.pop() + haraka;
    }
    harakat.push(haraka);
  }
  for (let i = 0; i < input.length; i++) {
    o += input.charAt(i) + (harakat[i] || '');
  }
  return o;
}
function prepareOutput(logits, input) {
  return combineTextWithHarakat(
    input,
    logitsToString(clean(logits.map(argMax)))
  );
}
async function TextWithHarakat(str) {
  var t = new Date().getTime();
  let img = prepareInput(str);
  let model = window.HarakatModel || (await tf.loadLayersModel(simple_model));
  window.HarakatModel = model;
  var v = [];
  for (i = 0; i < 495; i++) v[i] = 28;
  for (i = 0; i < Math.min(v.length, img.length); i++) v[i] = img[i];
  var zero = tf.tensor([v]);
  var y = model.predict(zero);
  var output = await y.argMax(2).data();

  var ret = combineTextWithHarakat(
    str,
    [...output].map((i) => intToStrData[i]).reduce((a, b) => a + b, '')
  );
  console.log(new Date().getTime() - t);
  return ret;
}

var middle_model = './tmp/Shakkala-master/model/middle_model/model.json';
var second_model6 = './tmp/Shakkala-master/model/second_model6/model.json';
var simple_model = './tmp/Shakkala-master/model/simple_model/model.json';
var simple_mlp = './tmp/Shakkala-master/model/mlp/model.json';

the i used the code

TextWithHarakat('بسم الله الرحمان الرحيم');

using webgl give reasonable time 1.5 second