Skip to content

Commit fae590b

Browse files
committed
Added (some) standardization from #121
1 parent a6a8e2f commit fae590b

File tree

2 files changed

+48
-1
lines changed

2 files changed

+48
-1
lines changed

lib/constants/AECD-rewritings.json

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"S/he": "s/he",
3+
"A": "a",
4+
"The": "the",
5+
"It": "it",
6+
"An": "an",
7+
"Being": "being",
8+
"Her/his": "her/his",
9+
"They": "they",
10+
"There": "there",
11+
"My": "my",
12+
"Something": "something",
13+
"Having": "having",
14+
"In": "in",
15+
"One": "one",
16+
"He": "he",
17+
"She": "she",
18+
"Someone": "someone",
19+
"All": "all",
20+
"What": "what",
21+
"On": "on",
22+
"Your": "your",
23+
"Two": "two",
24+
"Three": "three",
25+
"Last": "last",
26+
"Four": "four",
27+
"Along": "along",
28+
"Too": "too",
29+
"To": "to",
30+
"That": "that",
31+
"Right": "right",
32+
"Our": "our",
33+
"Close": "close",
34+
"As": "as"
35+
}

lib/convert/AECD.js

+13-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ const { readJSON } = fs;
1111
*/
1212
let posMap;
1313

14+
/** A map of rewritings that is to be used later */
15+
let rewritings;
16+
1417
/**
1518
* A class representing a Maskwacîs database entry in DLx JSON format.
1619
*/
@@ -80,7 +83,7 @@ class Entry {
8083
// this.semanticIndices = Entry.#splitSemanticDomains(RapidWordsIndices);
8184
this.test = test;
8285

83-
this.senses = Entry.#splitDefinition(Definition)
86+
this.senses = Entry.#splitDefinition(Entry.#applyRewritings(Definition))
8487
.map(definition => ({
8588
definition,
8689
original: definition,
@@ -139,6 +142,14 @@ class Entry {
139142
return string.replace(/[?!]/gu, ``);
140143
}
141144

145+
static #applyRewritings( string ) {
146+
let text = string;
147+
rewritings.forEach((replaceWith, match) => {
148+
text = text.replaceAll(match,replaceWith);
149+
});
150+
return text;
151+
}
152+
142153
/**
143154
* Splits a string into senses based on sense numbers (1., 2. etc.).
144155
* @param {String} definition The string to split.
@@ -174,6 +185,7 @@ export default async function convertAECD(inputPath, outputPath) {
174185

175186
const posTable = await readJSON(`./lib/constants/AECD-pos.json`);
176187
posMap = new Map(Object.entries(posTable));
188+
rewritings = new Map(Object.entries(await readJSON(`./lib/constants/AECD-rewritings.json`)));
177189

178190
const errors = [];
179191
let entries = await readTSV(inputPath, { raw: true, relaxColumnCount: true });

0 commit comments

Comments
 (0)