Skip to content

Commit 80f8ece

Browse files
authored
Merge pull request #1013 from contentstack/feat/sitecore-locales-env
refactor: enhance locale extraction process with async handling and i…
2 parents ff3bd8e + 5e0afa1 commit 80f8ece

File tree

1 file changed

+171
-68
lines changed

1 file changed

+171
-68
lines changed
Lines changed: 171 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,181 @@
11
/* eslint-disable @typescript-eslint/no-var-requires */
2-
const fs = require('fs');
3-
const path = require('path');
4-
5-
const extractLocales = (dir) => {
6-
console.info('🔍 [DEBUG] extractLocales - Starting locale extraction from:', dir);
7-
console.time('🔍 [DEBUG] extractLocales - Total extraction time');
8-
9-
// ✅ Create a new Set for each function call instead of using global
10-
const uniqueLanguages = new Set();
11-
let fileCount = 0;
12-
let processedFiles = 0;
13-
14-
const extractRecursive = (currentDir) => {
15-
try {
16-
const items = fs?.readdirSync?.(currentDir, { withFileTypes: true });
17-
18-
for (const item of items) {
19-
const fullPath = path?.join?.(currentDir, item?.name);
20-
21-
if (item?.isDirectory()) {
22-
// ✅ Skip certain directories that are unlikely to contain locale data
23-
const skipDirs = [
24-
'__Standard Values',
25-
'__Prototypes',
26-
'__Masters',
27-
'blob',
28-
'media library'
29-
];
30-
if (!skipDirs.some((skipDir) => item.name.includes(skipDir))) {
31-
extractRecursive(fullPath);
32-
}
33-
} else if (item?.isFile() && item?.name === 'data.json') {
34-
fileCount++;
35-
try {
36-
const rawData = fs?.readFileSync?.(fullPath, 'utf8');
37-
const jsonData = JSON?.parse?.(rawData);
38-
const language = jsonData?.item?.$?.language;
39-
40-
if (language) {
41-
uniqueLanguages?.add?.(language);
42-
processedFiles++;
43-
console.info(
44-
`🔍 [DEBUG] extractLocales - Found locale: "${language}" in ${fullPath}`
45-
);
46-
}
47-
} catch (error) {
48-
console.error(`🔍 [DEBUG] extractLocales - Error reading ${fullPath}:`, error?.message);
49-
}
50-
51-
// ✅ Progress logging every 100 files
52-
if (fileCount % 100 === 0) {
53-
console.info(
54-
`🔍 [DEBUG] extractLocales - Progress: ${fileCount} files scanned, ${uniqueLanguages.size} unique locales found`
55-
);
56-
}
57-
}
2+
const fs = require("fs");
3+
const path = require("path");
4+
5+
// ─── tunables ────────────────────────────────────────────────────────────────
6+
const HEAD_BYTES = 131_072; // 128 KiB – covers item.$.language in all known exports
7+
const CONCURRENCY = Number(process.env.LOCALE_CONCURRENCY) || 24;
8+
const DEBUG = process.env.DEBUG_SITECORE_LOCALES === "1";
9+
10+
// Fast-path: find the "$" metadata block first, then extract language from it.
11+
// This avoids matching a "language" key that belongs to nested field content.
12+
//
13+
// Strategy:
14+
// 1. Find the first "$": { block in the head window (where item.$ lives)
15+
// 2. Extract up to 512 chars after it (enough to cover all metadata keys)
16+
// 3. Match "language" only within that narrow slice
17+
//
18+
// Fallback to full JSON.parse handles any file where this doesn't match.
19+
const META_BLOCK_RE = /"\$"\s*:\s*\{([^}]{1,512})\}/;
20+
const LANG_IN_META_RE = /"language"\s*:\s*"([^"]{1,64})"/;
21+
22+
// Hoisted once – never recreated in the hot path
23+
// Combines your original Sitecore system dirs + filesystem noise dirs
24+
const SKIP_DIRS = new Set([
25+
"__Standard Values",
26+
"__Prototypes",
27+
"__Masters",
28+
"blob",
29+
"media library",
30+
"node_modules",
31+
".git",
32+
"__MACOSX",
33+
]);
34+
35+
// ─── phase 1: collect all data.json paths ────────────────────────────────────
36+
37+
async function collectPaths(dir, results = []) {
38+
if (dir == null || typeof dir !== "string" || dir?.length === 0) {
39+
console.error("[extractLocales] collectPaths: invalid or empty dir");
40+
return results;
41+
}
42+
43+
let entries;
44+
try {
45+
entries = await fs.promises.readdir(dir, { withFileTypes: true });
46+
} catch (err) {
47+
console.error(`[extractLocales] cannot read dir ${dir}:`, err?.message ?? err);
48+
return results;
49+
}
50+
51+
const subdirs = [];
52+
53+
for (const entry of entries) {
54+
const name = entry?.name;
55+
if (typeof name !== "string") continue;
56+
57+
// Match your original logic: skip if any skipDir is a substring of the name
58+
if ([...SKIP_DIRS].some((s) => name.includes(s))) continue;
59+
60+
const full = path.join(dir, name);
61+
62+
if (entry?.isDirectory?.()) {
63+
subdirs.push(full);
64+
} else if (entry?.isFile?.() && name === "data.json") {
65+
results.push(full);
66+
}
67+
}
68+
69+
await Promise.all(subdirs.map((d) => collectPaths(d, results)));
70+
return results;
71+
}
72+
73+
// ─── phase 2: extract language from one file ─────────────────────────────────
74+
75+
async function extractLanguage(filePath) {
76+
if (filePath == null || typeof filePath !== "string" || filePath.length === 0) {
77+
return null;
78+
}
79+
80+
let fd;
81+
try {
82+
// Fast path — read only the first 128 KiB
83+
fd = await fs.promises.open(filePath, "r");
84+
const buf = Buffer.allocUnsafe(HEAD_BYTES);
85+
const { bytesRead } = await fd.read(buf, 0, HEAD_BYTES, 0);
86+
await fd.close();
87+
fd = null;
88+
89+
const head = buf.toString("utf8", 0, bytesRead);
90+
const block = META_BLOCK_RE.exec(head);
91+
const metaSlice = block != null && block[1] != null ? block[1] : null;
92+
const m = metaSlice != null ? LANG_IN_META_RE.exec(metaSlice) : null;
93+
94+
if (m != null && m[1] != null && m[1] !== "") {
95+
if (DEBUG) console.debug(`[fast] ${filePath}${m[1]}`);
96+
return m[1];
97+
}
98+
99+
// Fallback — full parse (identical to original behaviour)
100+
if (DEBUG) console.debug(`[fallback] ${filePath}`);
101+
const raw = await fs.promises.readFile(filePath, "utf8");
102+
const json = JSON.parse(raw);
103+
return json?.item?.$?.language ?? null;
104+
105+
} catch (err) {
106+
console.error(`[extractLocales] error reading ${filePath}:`, err?.message ?? err);
107+
return null;
108+
} finally {
109+
if (fd) await fd.close().catch(() => { });
110+
}
111+
}
112+
113+
// ─── phase 3: bounded-concurrency processing ─────────────────────────────────
114+
115+
async function processWithConcurrency(paths, concurrency) {
116+
const locales = new Set();
117+
if (!Array.isArray(paths)) {
118+
return locales;
119+
}
120+
121+
const limit = Math.max(1, Number(concurrency) || CONCURRENCY);
122+
const total = paths?.length;
123+
let idx = 0;
124+
let scanned = 0;
125+
126+
async function worker() {
127+
while (idx < paths?.length) {
128+
const filePath = paths[idx++];
129+
if (filePath == null || typeof filePath !== "string") continue;
130+
const lang = await extractLanguage(filePath);
131+
if (lang) locales.add(lang);
132+
scanned++;
133+
if (scanned % 100 === 0) {
134+
console.info(
135+
`[extractLocales] progress: ${scanned}/${total} files scanned, ${locales.size} unique locale(s) found`
136+
);
58137
}
59-
} catch (error) {
60-
console.error(
61-
`🔍 [DEBUG] extractLocales - Error reading directory ${currentDir}:`,
62-
error?.message
63-
);
64138
}
65-
};
139+
}
140+
141+
await Promise.all(
142+
Array.from({ length: Math.min(limit, paths.length) }, worker)
143+
);
144+
145+
return locales;
146+
}
147+
148+
// ─── public API ──────────────────────────────────────────────────────────────
149+
150+
/**
151+
* Walk `dir` and return a Set of all unique locale strings found in data.json files.
152+
* Async drop-in replacement for the original synchronous version.
153+
*
154+
* @param {string} dir
155+
* @returns {Promise<Set<string>>}
156+
*/
157+
const extractLocales = async (dir) => {
158+
const empty = new Set();
159+
if (dir == null || typeof dir !== "string" || dir?.length === 0) {
160+
console.error("[extractLocales] invalid or empty dir; returning empty locale set");
161+
return empty;
162+
}
163+
164+
console.info("[extractLocales] starting locale extraction from:", dir);
165+
console.time("[extractLocales] total extraction time");
166+
167+
const paths = await collectPaths(dir);
168+
console.info(`[extractLocales] found ${paths.length} data.json files`);
66169

67-
extractRecursive(dir);
170+
const locales = await processWithConcurrency(paths, CONCURRENCY);
68171

69-
console.timeEnd('🔍 [DEBUG] extractLocales - Total extraction time');
172+
console.timeEnd("[extractLocales] total extraction time");
70173
console.info(
71-
`🔍 [DEBUG] extractLocales - Final results: ${fileCount} total files scanned, ${processedFiles} files with locale data, ${uniqueLanguages.size} unique locales found`
174+
`[extractLocales] done — ${paths.length} files scanned, ${locales.size} unique locale(s):`,
175+
Array.from(locales)
72176
);
73-
console.info('🔍 [DEBUG] extractLocales - Unique locales:', Array.from(uniqueLanguages));
74177

75-
return uniqueLanguages;
178+
return locales;
76179
};
77180

78-
module.exports = extractLocales;
181+
module.exports = extractLocales;

0 commit comments

Comments
 (0)