11/* eslint-disable @typescript-eslint/no-var-requires */
2- const fs = require ( 'fs' ) ;
3- const path = require ( 'path' ) ;
4-
5- const extractLocales = ( dir ) => {
6- console . info ( '🔍 [DEBUG] extractLocales - Starting locale extraction from:' , dir ) ;
7- console . time ( '🔍 [DEBUG] extractLocales - Total extraction time' ) ;
8-
9- // ✅ Create a new Set for each function call instead of using global
10- const uniqueLanguages = new Set ( ) ;
11- let fileCount = 0 ;
12- let processedFiles = 0 ;
13-
14- const extractRecursive = ( currentDir ) => {
15- try {
16- const items = fs ?. readdirSync ?. ( currentDir , { withFileTypes : true } ) ;
17-
18- for ( const item of items ) {
19- const fullPath = path ?. join ?. ( currentDir , item ?. name ) ;
20-
21- if ( item ?. isDirectory ( ) ) {
22- // ✅ Skip certain directories that are unlikely to contain locale data
23- const skipDirs = [
24- '__Standard Values' ,
25- '__Prototypes' ,
26- '__Masters' ,
27- 'blob' ,
28- 'media library'
29- ] ;
30- if ( ! skipDirs . some ( ( skipDir ) => item . name . includes ( skipDir ) ) ) {
31- extractRecursive ( fullPath ) ;
32- }
33- } else if ( item ?. isFile ( ) && item ?. name === 'data.json' ) {
34- fileCount ++ ;
35- try {
36- const rawData = fs ?. readFileSync ?. ( fullPath , 'utf8' ) ;
37- const jsonData = JSON ?. parse ?. ( rawData ) ;
38- const language = jsonData ?. item ?. $ ?. language ;
39-
40- if ( language ) {
41- uniqueLanguages ?. add ?. ( language ) ;
42- processedFiles ++ ;
43- console . info (
44- `🔍 [DEBUG] extractLocales - Found locale: "${ language } " in ${ fullPath } `
45- ) ;
46- }
47- } catch ( error ) {
48- console . error ( `🔍 [DEBUG] extractLocales - Error reading ${ fullPath } :` , error ?. message ) ;
49- }
50-
51- // ✅ Progress logging every 100 files
52- if ( fileCount % 100 === 0 ) {
53- console . info (
54- `🔍 [DEBUG] extractLocales - Progress: ${ fileCount } files scanned, ${ uniqueLanguages . size } unique locales found`
55- ) ;
56- }
57- }
2+ const fs = require ( "fs" ) ;
3+ const path = require ( "path" ) ;
4+
5+ // ─── tunables ────────────────────────────────────────────────────────────────
6+ const HEAD_BYTES = 131_072 ; // 128 KiB – covers item.$.language in all known exports
7+ const CONCURRENCY = Number ( process . env . LOCALE_CONCURRENCY ) || 24 ;
8+ const DEBUG = process . env . DEBUG_SITECORE_LOCALES === "1" ;
9+
10+ // Fast-path: find the "$" metadata block first, then extract language from it.
11+ // This avoids matching a "language" key that belongs to nested field content.
12+ //
13+ // Strategy:
14+ // 1. Find the first "$": { block in the head window (where item.$ lives)
15+ // 2. Extract up to 512 chars after it (enough to cover all metadata keys)
16+ // 3. Match "language" only within that narrow slice
17+ //
18+ // Fallback to full JSON.parse handles any file where this doesn't match.
19+ const META_BLOCK_RE = / " \$ " \s * : \s * \{ ( [ ^ } ] { 1 , 512 } ) \} / ;
20+ const LANG_IN_META_RE = / " l a n g u a g e " \s * : \s * " ( [ ^ " ] { 1 , 64 } ) " / ;
21+
22+ // Hoisted once – never recreated in the hot path
23+ // Combines your original Sitecore system dirs + filesystem noise dirs
24+ const SKIP_DIRS = new Set ( [
25+ "__Standard Values" ,
26+ "__Prototypes" ,
27+ "__Masters" ,
28+ "blob" ,
29+ "media library" ,
30+ "node_modules" ,
31+ ".git" ,
32+ "__MACOSX" ,
33+ ] ) ;
34+
35+ // ─── phase 1: collect all data.json paths ────────────────────────────────────
36+
37+ async function collectPaths ( dir , results = [ ] ) {
38+ if ( dir == null || typeof dir !== "string" || dir ?. length === 0 ) {
39+ console . error ( "[extractLocales] collectPaths: invalid or empty dir" ) ;
40+ return results ;
41+ }
42+
43+ let entries ;
44+ try {
45+ entries = await fs . promises . readdir ( dir , { withFileTypes : true } ) ;
46+ } catch ( err ) {
47+ console . error ( `[extractLocales] cannot read dir ${ dir } :` , err ?. message ?? err ) ;
48+ return results ;
49+ }
50+
51+ const subdirs = [ ] ;
52+
53+ for ( const entry of entries ) {
54+ const name = entry ?. name ;
55+ if ( typeof name !== "string" ) continue ;
56+
57+ // Match your original logic: skip if any skipDir is a substring of the name
58+ if ( [ ...SKIP_DIRS ] . some ( ( s ) => name . includes ( s ) ) ) continue ;
59+
60+ const full = path . join ( dir , name ) ;
61+
62+ if ( entry ?. isDirectory ?. ( ) ) {
63+ subdirs . push ( full ) ;
64+ } else if ( entry ?. isFile ?. ( ) && name === "data.json" ) {
65+ results . push ( full ) ;
66+ }
67+ }
68+
69+ await Promise . all ( subdirs . map ( ( d ) => collectPaths ( d , results ) ) ) ;
70+ return results ;
71+ }
72+
73+ // ─── phase 2: extract language from one file ─────────────────────────────────
74+
75+ async function extractLanguage ( filePath ) {
76+ if ( filePath == null || typeof filePath !== "string" || filePath . length === 0 ) {
77+ return null ;
78+ }
79+
80+ let fd ;
81+ try {
82+ // Fast path — read only the first 128 KiB
83+ fd = await fs . promises . open ( filePath , "r" ) ;
84+ const buf = Buffer . allocUnsafe ( HEAD_BYTES ) ;
85+ const { bytesRead } = await fd . read ( buf , 0 , HEAD_BYTES , 0 ) ;
86+ await fd . close ( ) ;
87+ fd = null ;
88+
89+ const head = buf . toString ( "utf8" , 0 , bytesRead ) ;
90+ const block = META_BLOCK_RE . exec ( head ) ;
91+ const metaSlice = block != null && block [ 1 ] != null ? block [ 1 ] : null ;
92+ const m = metaSlice != null ? LANG_IN_META_RE . exec ( metaSlice ) : null ;
93+
94+ if ( m != null && m [ 1 ] != null && m [ 1 ] !== "" ) {
95+ if ( DEBUG ) console . debug ( `[fast] ${ filePath } → ${ m [ 1 ] } ` ) ;
96+ return m [ 1 ] ;
97+ }
98+
99+ // Fallback — full parse (identical to original behaviour)
100+ if ( DEBUG ) console . debug ( `[fallback] ${ filePath } ` ) ;
101+ const raw = await fs . promises . readFile ( filePath , "utf8" ) ;
102+ const json = JSON . parse ( raw ) ;
103+ return json ?. item ?. $ ?. language ?? null ;
104+
105+ } catch ( err ) {
106+ console . error ( `[extractLocales] error reading ${ filePath } :` , err ?. message ?? err ) ;
107+ return null ;
108+ } finally {
109+ if ( fd ) await fd . close ( ) . catch ( ( ) => { } ) ;
110+ }
111+ }
112+
113+ // ─── phase 3: bounded-concurrency processing ─────────────────────────────────
114+
115+ async function processWithConcurrency ( paths , concurrency ) {
116+ const locales = new Set ( ) ;
117+ if ( ! Array . isArray ( paths ) ) {
118+ return locales ;
119+ }
120+
121+ const limit = Math . max ( 1 , Number ( concurrency ) || CONCURRENCY ) ;
122+ const total = paths ?. length ;
123+ let idx = 0 ;
124+ let scanned = 0 ;
125+
126+ async function worker ( ) {
127+ while ( idx < paths ?. length ) {
128+ const filePath = paths [ idx ++ ] ;
129+ if ( filePath == null || typeof filePath !== "string" ) continue ;
130+ const lang = await extractLanguage ( filePath ) ;
131+ if ( lang ) locales . add ( lang ) ;
132+ scanned ++ ;
133+ if ( scanned % 100 === 0 ) {
134+ console . info (
135+ `[extractLocales] progress: ${ scanned } /${ total } files scanned, ${ locales . size } unique locale(s) found`
136+ ) ;
58137 }
59- } catch ( error ) {
60- console . error (
61- `🔍 [DEBUG] extractLocales - Error reading directory ${ currentDir } :` ,
62- error ?. message
63- ) ;
64138 }
65- } ;
139+ }
140+
141+ await Promise . all (
142+ Array . from ( { length : Math . min ( limit , paths . length ) } , worker )
143+ ) ;
144+
145+ return locales ;
146+ }
147+
148+ // ─── public API ──────────────────────────────────────────────────────────────
149+
150+ /**
151+ * Walk `dir` and return a Set of all unique locale strings found in data.json files.
152+ * Async drop-in replacement for the original synchronous version.
153+ *
154+ * @param {string } dir
155+ * @returns {Promise<Set<string>> }
156+ */
157+ const extractLocales = async ( dir ) => {
158+ const empty = new Set ( ) ;
159+ if ( dir == null || typeof dir !== "string" || dir ?. length === 0 ) {
160+ console . error ( "[extractLocales] invalid or empty dir; returning empty locale set" ) ;
161+ return empty ;
162+ }
163+
164+ console . info ( "[extractLocales] starting locale extraction from:" , dir ) ;
165+ console . time ( "[extractLocales] total extraction time" ) ;
166+
167+ const paths = await collectPaths ( dir ) ;
168+ console . info ( `[extractLocales] found ${ paths . length } data.json files` ) ;
66169
67- extractRecursive ( dir ) ;
170+ const locales = await processWithConcurrency ( paths , CONCURRENCY ) ;
68171
69- console . timeEnd ( '🔍 [DEBUG] extractLocales - Total extraction time' ) ;
172+ console . timeEnd ( "[extractLocales] total extraction time" ) ;
70173 console . info (
71- `🔍 [DEBUG] extractLocales - Final results: ${ fileCount } total files scanned, ${ processedFiles } files with locale data, ${ uniqueLanguages . size } unique locales found`
174+ `[extractLocales] done — ${ paths . length } files scanned, ${ locales . size } unique locale(s):` ,
175+ Array . from ( locales )
72176 ) ;
73- console . info ( '🔍 [DEBUG] extractLocales - Unique locales:' , Array . from ( uniqueLanguages ) ) ;
74177
75- return uniqueLanguages ;
178+ return locales ;
76179} ;
77180
78- module . exports = extractLocales ;
181+ module . exports = extractLocales ;
0 commit comments