-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
executable file
·394 lines (319 loc) · 12.3 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
#!/usr/bin/make -rf
# vim:noet:ts=3:nowrap
# @file Makefile
# @brief Master makefile for the framework, handles dependencies between modules.
#
# @author Samuel Larkin
#
# Traitement multilingue de textes / Multilingual Text Processing
# Centre de recherche en technologies numériques / Digital Technologies Research Centre
# Conseil national de recherches Canada / National Research Council Canada
# Copyright 2008, 2012, 2015, 2018, Sa Majeste la Reine du Chef du Canada
# Copyright 2008, 2012, 2015, 2018, Her Majesty in Right of Canada
# Mandatory include: master config file.
include Makefile.params
# Include the master toolkit.
include Makefile.toolkit
.DEFAULT_GOAL := help
.SUFFIXES:
.DELETE_ON_ERROR:
MAIN_TARGETS := all clean help
########################################
.PHONY: all
all: SHELL=${LOCAL_SHELL}
all: tune_main
ifneq ($(strip ${TEST_SET}),)
all: eval
endif
all:
@echo "Training, tuning and translating using the framework are all done."
########################################
.PHONY: help
help: SHELL=${LOCAL_SHELL}
help:
${HELP_IRSTLM}
${HELP_LIST_ALL_CORPORA}
@echo " test set: ${TEST_SET}"
ifneq ($(strip ${TRANSLATE_SET}),)
@echo " translate set: ${TRANSLATE_SET}"
endif
@echo
${HELP_OTHER_PARAMS}
@echo
@echo "To run the framework, type: make all"
${HELP_LIST_MAIN_TARGETS}
@echo
@echo "Additional targets in this Makefile include:"
@echo -e " " ${OTHER_TARGETS}
########################################
.PHONY: doc
doc: SHELL=${LOCAL_SHELL}
doc: tutorial.pdf
MAIN_TARGETS += doc
%.pdf: SHELL=${LOCAL_SHELL}
%.pdf: %.tex
# latex actually needs to be run three times for the table of contents to be
# generated correctly (a trivial change on one line has a significant ripple
# effect to paging between the 1st and 2nd pass, so that several entries in the
# TOC are changed between the 2nd and 3rd pass).
TEXINPUTS=${PORTAGE}/texmf: pdflatex -interaction=batchmode $<
TEXINPUTS=${PORTAGE}/texmf: pdflatex -interaction=batchmode $<
TEXINPUTS=${PORTAGE}/texmf: pdflatex -interaction=batchmode $<
########################################
# Clean up
.PHONY: clean clean.content clean.doc clean.logs hide.logs
# Thorough cleaning of everything, including their old names
clean: SHELL=${LOCAL_SHELL}
clean: clean.content clean.logs clean.doc
${RM} tutorial.pdf
${RM} log.INSTALL_SUMMARY
# hide.logs hides logs from user's view into .logs
clean.content clean.logs hide.logs: SHELL=${LOCAL_SHELL}
clean.content clean.logs hide.logs: %:
${MAKE} -C corpora $@
${MAKE} -C models $@
${MAKE} -C translate $@
ifneq ($(strip ${TUNE_DECODE_VARIANTS}),)
${RM} -r $(addprefix translate., ${TUNE_DECODE_VARIANTS})
endif
# Clean auxiliary files from make doc, but not the .pdf itself.
clean.doc: SHELL=${LOCAL_SHELL}
clean.doc:
${RM} tutorial.{aux,log,toc,out}
########################################
# Prepare the corpora.
.PHONY: corpora
corpora: SHELL=${LOCAL_SHELL}
corpora: check_setup
${MAKE} -C corpora all
OTHER_TARGETS += corpora
# Create the Language Models (LM, MixLM, CoarseLM, BiLM).
# Create the word classes required for CoarseLM (wcl)
# Create the Lexicalized Distortion Models (LDM).
# Create the sparse model, which includes the discriminative hierarchical
# distortion model (sparse / DHDM).
# Create the NNJM
# Create models for truecasing (TC).
# Create the Translation Model (TM).
MODEL_TARGETS = models lm mixlm ibm wal wcl coarselm bilm ldm sparse nnjm tc jpt tm
.PHONY: ${MODEL_TARGETS}
${MODEL_TARGETS}: SHELL=${LOCAL_SHELL}
${MODEL_TARGETS}: %: corpora
${MAKE} -C models $@ DO_UPDATE_PRETRAINED_LINKS=1
OTHER_TARGETS += ${MODEL_TARGETS}
.PHONY: tune_main
tune_main: SHELL=${LOCAL_SHELL}
tune_main: tune_variant # tune_variant tunes the main variant
OTHER_TARGETS += "\n tune_main"
# Tune and test using multiple alternate tuning variants, if necessary.
ifneq ($(strip ${TUNE_DECODE_VARIANTS}),)
all: $(addprefix tune_variant., ${TUNE_DECODE_VARIANTS})
ifneq ($(strip ${TEST_SET}),)
all: $(addprefix eval., ${TUNE_DECODE_VARIANTS})
endif
endif
TUNE_VARIANT_LIST := tune_variant $(addprefix tune_variant., ${TUNE_DECODE_VARIANTS})
DECODE_LIST := decode $(addprefix decode., ${TUNE_DECODE_VARIANTS})
COW_LIST := cow $(addprefix cow., ${TUNE_DECODE_VARIANTS})
CONFIDENCE_LIST := confidence $(addprefix confidence., ${TUNE_DECODE_VARIANTS})
TUNE_LIST := tune ${TUNE_VARIANT_LIST} ${DECODE_LIST} ${COW_LIST} rescore rat ${CONFIDENCE_LIST}
.PHONY: ${TUNE_LIST}
# Tune weights
${TUNE_LIST}: SHELL=${LOCAL_SHELL}
${TUNE_LIST}: %: models
${MAKE} -C models $@
OTHER_TARGETS += decode confidence tune rescore
.PHONY: translate $(addprefix translate., ${TUNE_DECODE_VARIANTS})
# Apply tuned weights to the test sets
translate $(addprefix translate., ${TUNE_DECODE_VARIANTS}): SHELL=${LOCAL_SHELL}
translate $(addprefix translate., ${TUNE_DECODE_VARIANTS}): translate%: tune_variant%
if [ ! -e $@ ]; then \
mkdir $@; \
cp -p translate/Makefile* $@; \
fi
${MAKE} -C translate$* all TUNE_VARIANT_TAG=$*
OTHER_TARGETS += translate
.PHONY: eval $(addprefix eval., ${TUNE_DECODE_VARIANTS})
# Get BLEU scores for the test set(s)
eval $(addprefix eval., ${TUNE_DECODE_VARIANTS}): SHELL=${LOCAL_SHELL}
eval $(addprefix eval., ${TUNE_DECODE_VARIANTS}): eval%: translate%
${MAKE} -C translate$* bleu TUNE_VARIANT_TAG=$*
OTHER_TARGETS += eval
.PHONY: check_setup
check_setup: SHELL=${LOCAL_SHELL}
check_setup:
${MAKE} -C models/lm check_setup
MAIN_TARGETS += check_setup
########################################
# Copy the bin/INSTALL_SUMMARY file, if it exists.
ifneq ($(wildcard $(dir $(shell which train_ibm))/INSTALL_SUMMARY),)
check_setup: log.INSTALL_SUMMARY
endif
log.INSTALL_SUMMARY: SHELL=${LOCAL_SHELL}
log.INSTALL_SUMMARY:
cat `dirname $$(which train_ibm)`/INSTALL_SUMMARY >log.INSTALL_SUMMARY
########################################
# Prepare portageLive models.
# NOTE: In order to able to execute portageLive we should at the very least
# have tuned the system. To do so, we will rely on the all target.
.PHONY: portageLive
portageLive: SHELL=${LOCAL_SHELL}
portageLive: all
${MAKE} -C corpora portageLive
${MAKE} -C models portageLive
ifdef DO_RESCORING
@echo ""
@echo "WARNING: the portageLive target does not install rescoring models."
@echo "You will have to install them manually before you continue."
@echo "Note that not all rescoring features are compatible with PortageLive."
@echo ""
endif
@echo "You now have all that is needed for PortageLive."
@echo "From the framework root, run one of the following commands to"
@echo "transfer the PortageLive models to your server:"
@echo " rsync -Larz models/portageLive/* <REMOTE_HOST>:<DEST_DIR_ON_REMOTE_HOST>"
@echo "or scp -r models/portageLive/* <REMOTE_HOST>:<DEST_DIR_ON_REMOTE_HOST>"
@echo "or cp -Lr models/portageLive/* <DEST_DIR_ON_LOCAL_HOST>"
@echo "Afterwards, optimize pretrained models on each PortageLive server:"
@echo " ssh <REMOTE_HOST> plive-optimize-pretrained.sh <DEST_DIR_ON_REMOTE_HOST>"
@echo "or plive-optimize-pretrained.sh <DEST_DIR_ON_LOCAL_HOST>"
MAIN_TARGETS += portageLive
# convenient synonyms
portagelive: portageLive
PortageLive: portageLive
########################################
# If you need to preprocess your corpora, you can call this target to do the job.
# The end result should be .al files .
PREPARE_CORPORA_MAKEFILE ?= Makefile.prepare.corpora
.PHONY: prepare.corpora
prepare.corpora: SHELL=${LOCAL_SHELL}
prepare.corpora:
${MAKE} -C corpora -f ${PREPARE_CORPORA_MAKEFILE} all
########################################
# Resource Summary
.PHONY: resource_summary
resource_summary: SHELL=${LOCAL_SHELL}
resource_summary: export PORTAGE_INTERNAL_CALL=1
resource_summary:
@${MAKE} --no-print-directory -s -C models time-mem
@${MAKE} --no-print-directory -s -C translate time-mem
OTHER_TARGETS += resource_summary
.PHONY: time-mem
time-mem: SHELL=${LOCAL_SHELL}
time-mem: export PORTAGE_INTERNAL_CALL=1
time-mem:
@echo "Resource summary for `pwd`:"
@time-mem-tally.pl `find models translate translate.* -type f -name log.\* -o -name \*.log | sort` \
| second-to-hms.pl \
| expand-auto.pl
MAIN_TARGETS += time-mem
DU_DIRS = models/ibm/{ibm,hmm}* models/jpt/jpt* models/tm/cpt* models/*lm/*lm*
ifdef DO_TRUECASING
DU_DIRS += models/tc
endif
ifneq ($(or $(USE_LDM),$(USE_HLDM)),)
DU_DIRS += models/ldm
endif
ifdef USE_SPARSE
DU_DIRS += models/sparse
endif
ifneq (${NNJM_TRAIN_CORPUS},)
DU_DIRS += models/nnjm/trained
endif
ifneq (${NNJM_FINE_TUNING_TRAIN_CORPUS},)
DU_DIRS += models/nnjm/fine_tuned
endif
DU_DIRS += models/decode*
ifdef DO_CE
DU_DIRS += models/confidence*/*.cem
endif
DU_DIRS += translate translate.*
.PHONY: summary
summary: SHELL=${LOCAL_SHELL}
summary: export PORTAGE_INTERNAL_CALL=1
summary: time-mem
@echo
@echo "Disk usage for all models:"
@( GLOBIGNORE="*/log.*:translate.sh"; du -sch ${DU_DIRS} 2> /dev/null || true)
@if [[ -e models/portageLive ]]; then \
echo; \
echo "Disk usage for portageLive models:"; \
du -hL models/portageLive; \
fi
MAIN_TARGETS += summary
################################################################################
# UNITTESTS
########################################
# Confidence Estimation & no Rescoring
.PHONY: unittest1
unittest1: export TUNE_CE = dev3
unittest1: export DO_CE = 1
unittest1: export DO_RESCORING =
unittest1: export MERT_MAX_ITER := 3
unittest1: export PARALLELISM_LEVEL_TUNE_DECODE := 4
unittest1:
${MAKE} all
########################################
# Unittest MixLM & LDMS.
.PHONY: unittest2
unittest2: export PRIMARY_LM :=
unittest2: export LM_TYPES := arpa
unittest2: export MIXLM := sublm1 sublm2 sublm3
unittest2: export USE_LDM := 1
unittest2: export USE_HLDM := 1
unittest2: export MERT_MAX_ITER := 3
unittest2: export PARALLELISM_LEVEL_TUNE_DECODE := 4
unittest2:
${MAKE} all
[[ `find models/mixlm/ -maxdepth 1 -size +21c -name sublm\*.lm.gz | \wc -l` -eq 6 ]] || ! echo "Missing some Language Model files." >&2
[[ `find models/mixlm/ -maxdepth 1 -size +1c -name dev1.mixlm | \wc -l` -eq 1 ]] || ! echo "Missing some the Mix Language Model file." >&2
########################################
# Unittest LDM & HLDM with more than one corpora.
.PHONY: unittest3
unittest3: export TRAIN_TM = sublm1 sublm2 sublm3
unittest3: export USE_LDM = 1
unittest3: export USE_HLDM = 1
unittest3:
${MAKE} ldm
[[ `find models/tm -maxdepth 1 -size +21c -name \*sublm\* | \wc -l` -eq 48 ]] || ! echo "Missing some translation model files." >&2
[[ `find models/ldm -maxdepth 1 -name ldm.* -size +21c | \wc -l` -eq 4 ]] || ! echo "Missing some Lexicalized Distortion Model files." >&2
[[ `find models/ldm -maxdepth 1 -name hldm.* -size +21c | \wc -l` -eq 4 ]] || ! echo "Missing some Hierarchical Lexicalized Distortion Model files." >&2
########################################
# Unittest MIXTM, 1WAM & CONFIDENCE ESTIMATION.
.PHONY: unittest4
unittest4: mixtm_1wam_ce_testcase
.PHONY: mixtm_1wam_ce_testcase
mixtm_1wam_ce_testcase: export SRC_LANG := fr
mixtm_1wam_ce_testcase: export TGT_LANG := en
mixtm_1wam_ce_testcase: export MIXTM := tm-train1 tm-train2
#mixtm_1wam_ce_testcase: export MIXTM := tm-train1 tm-train2 tm-train3
mixtm_1wam_ce_testcase: export TUNE_CE := dev3
mixtm_1wam_ce_testcase: export DO_CE := 1
mixtm_1wam_ce_testcase: export MIXTM_USE_GLOBAL_WORD_ALIGNMENT_MODEL := 1
mixtm_1wam_ce_testcase: export MERGED_CPT_JPT_TYPES := IBM2 HMM3
mixtm_1wam_ce_testcase: export MERT_MAX_ITER := 3
mixtm_1wam_ce_testcase: export USE_HLDM :=
mixtm_1wam_ce_testcase: export USE_SPARSE :=
mixtm_1wam_ce_testcase: export USE_COARSELM :=
mixtm_1wam_ce_testcase:
${MAKE} confidence
[[ -s models/confidence/ce-notm.ini ]] || ! echo "ERROR: Was unable to instanciate a CE template." >&2
[[ `grep -c mixwam models/confidence/ce-notm.ini` -eq 8 ]] || ! echo "ERROR: CE model should be using IBM mixwam." >&2
[[ -s models/confidence/ce_model.cem ]] || ! echo "ERROR: Was unable to train a CE model." >&2
${MAKE} -C models/confidence testsuite
########################################
.PHONY: lm_pretrained_tescase
lm_pretrained_tescase: ptgsh_295
.PHONY: ptgsh_295
ptgsh_295: export ORDER := 3
ptgsh_295: export SRC_LANG := en
ptgsh_295: export TGT_LANG := fr
ptgsh_295: export TRAIN_LM := lm-train
ptgsh_295: export MIXLM :=
ptgsh_295: export MIXLM_PRETRAINED_TGT_LMS :=
#ptgsh_295: export TRAIN_COARSELM :=
ptgsh_295: export TRAIN_BILM :=
ptgsh_295: export USE_COARSELM :=
ptgsh_295: export LM_PRETRAINED_TGT_LMS :=
ptgsh_295:
${MAKE} -C models/lm $@