Wiktionary Parser
Wiktionary Parser is a parser written in Python which takes a Wiktionary english dump file and produces a JSON file for each letter of the alphabet.
Usage
To use the Wiktionary Parser, first clone this directory by typing the following in your terminal:
git clone https://github.com/farleyoli/wiktionary-parser
Run the following command to start the parsing process:
python en_main.py -i <dump>
where dump
denotes the Wiktionary XML dump file, which can be downloaded from here.
You will be asked to repeat this process for each letter of the alphabet, in order not to use too much RAM.
Alternatively, you may want to directly download the resulting JSON files from the Minerva Popup Dictionary repository, where they are used.
Structure
The structure of the JSON files is as follows:
dict[word] is an array, where dict is the content of the JSON file,
and the structure of each dict[word][i] is:
{head, dfn}, where
dfn = (list of {ctnt, dfn, exs, qts}) or []
ctnt = contents (str)
exs = list of examples (str)
and qts = list of quotes (str)
see definition of head. (ex: head(tested) = test)
Below, for reference, we write an example of a JavaScript function that parses the JSON object dict
and edits it in HTML.
function constructDfn(dict, word) {
const retDiv = document.createElement("div");
const header = document.createElement("div");
header.id = "minerva-header";
retDiv.appendChild(header);
function constructDfnAux(dfn) {
const ret = document.createElement("div");
if (dfn.length <= 0) {
return ret;
}
for (let i = 0; i < dfn.length; i++) {
const dfnNode = document.createElement("div");
const dfnNoNode = document.createElement("span");
dfnNoNode.id = "minerva-definition-number";
dfnNoNode.innerText = (i+1).toString();
dfnNode.appendChild(dfnNoNode);
const textNode = document.createElement("span");
textNode.innerText = processContent(dfn[i]['ctnt']);
dfnNode.style.marginTop = "2.5%";
dfnNode.style.marginBottom = "2.5%";
dfnNode.appendChild(textNode);
ret.appendChild(dfnNode);
ret.appendChild(constructDfnAux(dfn[i]['dfn']));
}
ret.style.marginLeft = "5%";
return ret;
}
if (!(word in dict)) {
retDiv.innerText = "Word not found in dictionary.";
return retDiv;
}
for (let i = 0; i < dict[word].length; i++) {
retDiv.appendChild(constructDfnAux(dict[word][i]['dfn']));
}
return retDiv;
}