#/////////////////////////////////////////////////////////////////////
# makefile for Charniak parser TRAIN dir
#
# 07/02/07 Matt Lease
# Imported allScript training regime into the makefile for efficient 
# -j2 multi-proc training (printouts get intertwined, unfortunately).
#
#
# 06/18/09 Mark Johnson
# Added -fpermissive to CFLAGS to avoid g++ 4.4 compiler bailing on 
# const errors in program
#
#/////////////////////////////////////////////////////////////////////

#allow including .o files in shared libraries
CFLAGS = -fPIC -fpermissive

#---------------------------------------------------------------------
# Training Usage / Invocation / Required arguments

# Usage; "make" with no arguments will produce this 
#
# It's fine if DATA/ is supplied with a closing slash -- training works
# either way, so I'm not sure why allScript went to pains to remove it.

define trainUsage
	$(warning Usage: make <parser/lm/chineseParser> DATA_dir train_trees dev_trees \
	          (Trains English parser if no optional flags supplied))
endef

# Required Arguments 
#
# This test only catches "parser" invocations with missing  
# required parameters, but it correctly doesn't require the parameters 
# for building programs, and the test is confined to one place (here)
#
ifeq ($(MAKECMDGOALS),parser) 

ifndef DATA
$(trainUsage)
$(error DATA dir not specified)
endif
ifndef TRAIN
$(trainUsage)
$(error TRAIN corpus not specified)
endif
ifndef TUNE
$(trainUsage)
$(error TUNE corpus not specified)
endif

endif

# supplement input train and tune trees with the bugfix trees
#override DATA  := $(patsubst %/,%,$(DATA)) 
override TRAIN := $(TRAIN) $(DATA)/bugFix.txt 
override TUNE  := $(TUNE)  $(DATA)/bugFix.txt

# Invocation details
.PHONY: printArgs
printArgs :
	@echo INVOCATION:
	@echo "* directory: $(DATA)"
	@echo "* TRAIN file: $(TRAIN)"
	@echo "* TUNE file: $(TUNE)"

#---------------------------------------------------------------------
# MAKE (p. 46): "define" for 'canned command sequence' (no args)
define printSep
@echo -e "\n-----------------------------"
endef

#---------------------------------------------------------------------
# What if some training already done in parser/lm mode, and then make
# invoked for other mode??

HEAD_PROG := pTgNt # English head finder
SWITCH := -LEn

# MAKE:if a target's command fails, delete the target file
.DELETE_ON_ERROR:

parser: printArgs \
	$(DATA)/pSgT.txt $(DATA)/unitRules.txt \
	$(DATA)/pUgT.txt $(DATA)/nttCounts.txt $(DATA)/endings.txt \
	$(foreach x,r m l u h lm ru rm tt,$(DATA)/$(x).g $(DATA)/$(x).lambdas)

lm: SWITCH := -M
lm: parser $(DATA)/ww.g

chineseParser: SWITCH := -LCh
chineseParser: HEAD_PROG := pSfgT
chineseParser: parser

#---------------------------------------------------------------------
# pSgT and pUgT have multiple outputs -- if you list both outputs as
# targets of one rule, then parallel make will invoke the program 
# twice in parallel (this is bad). To work around this, arbitrarily 
# pick one output as the rule target, and make the others depend on
# this target. 
#
# Note you also need to specify these extra outputs are "empty 
# commands" using the ";" or else they will be matched to any 
# implicit "%" rule pattern in the makefile

$(DATA)/unitRules.txt: $(DATA)/pSgT.txt ;
$(DATA)/pSgT.txt: pSgT         	      
	$(printSep) 
	cat $(TRAIN) | pSgT $(SWITCH) $(DATA)/

$(DATA)/nttCounts.txt: $(DATA)/pUgT.txt ;
$(DATA)/pUgT.txt: pUgT $(DATA)/pSgT.txt 
	$(printSep)
	cat $(TRAIN) | pUgT $(SWITCH) $(DATA)/

$(DATA)/endings.txt: $(HEAD_PROG) $(DATA)/pSgT.txt 	      
	$(printSep)
	cat $(TRAIN) | $(HEAD_PROG) $(SWITCH) $(DATA)/

#---------------------------------------------------------------------
# right, "middle", left, ?, head, ...?
# "%" patten used for: r m l u h lm ru rm tt 
# MAKE: pattern referred to by "%" in prereqs but "$*" in commands
#
# Since %.ff & %.f are not explicitly given as targets for training 
# (they are generated by implicit rules in creating %.g and %.lambdas,
# make treats them as intermediate files and automatically deletes them
# after the explicit targets have been built. 

CUTOFF := 50
$(DATA)/ru.f : CUTOFF := 98
$(DATA)/tt.f : CUTOFF := 100

$(DATA)/%.ff: rCounts $(DATA)/pSgT.txt 
	$(printSep)
	cat $(TRAIN) | rCounts $(SWITCH) $* $(DATA)/
$(DATA)/%.f: selFeats $(DATA)/%.ff
	$(printSep)
	selFeats $* $(CUTOFF) $(DATA)/
$(DATA)/%.g: iScale $(DATA)/%.f
	$(printSep)
	iScale $* $(DATA)/
$(DATA)/%.lambdas: trainRs $(DATA)/%.g
	$(printSep)
	cat $(TUNE) | trainRs $(SWITCH) $* $(DATA)/

#---------------------------------------------------------------------
# Knesser-Ney trigram estimation

$(DATA)/ww.g : kn3Counts $(DATA)/pSgT.txt
	cat $(TRAIN) | kn3Counts ww $(DATA)/

#---------------------------------------------------------------------
# Build Training programs

.C.o:
	g++ $(CFLAGS) -c -O $<
 
RCOUNTS_OBJS = \
	ClassRule.o \
	ECArgs.o \
	EmpNums.o \
	Feat.o \
	Feature.o \
	FeatureTree.o \
	InputTree.o \
	Pst.o \
	Phegt.o \
	Term.o \
	auxify.o \
	ccInd.o \
	headFinder.o \
	headFinderCh.o \
	treeHistSf.o \
	utils.o \
	rCounts.o
rCounts: $(RCOUNTS_OBJS)
	g++ $(CFLAGS) $(RCOUNTS_OBJS) -o rCounts 

ISCALE_OBJS = \
	ECArgs.o \
	Feat.o \
	Feature.o \
	FeatureTree.o \
	FeatIter.o \
	FeatTreeIter.o \
	Phegt.o \
	Term.o \
	utils.o \
	iScale.o
iScale: $(ISCALE_OBJS)
	g++ $(CFLAGS) $(ISCALE_OBJS) -o iScale 
 
SELFEATS_OBJS = \
	ECArgs.o \
	Feat.o \
	Feature.o \
	FeatureTree.o \
	FeatIter.o \
	FeatTreeIter.o \
	Pst.o \
	Phegt.o \
	Term.o \
	utils.o \
	selFeats.o
selFeats: $(SELFEATS_OBJS)
	g++ $(CFLAGS) $(SELFEATS_OBJS) -o selFeats 

 
TRAINRS_OBJS = \
	trainRsUtils.o \
	ClassRule.o \
	ECArgs.o \
	EmpNums.o \
	Feat.o \
	Feature.o \
	FeatureTree.o \
	InputTree.o \
	Pst.o \
	Phegt.o \
	Smoother.o \
	Term.o \
	auxify.o \
	ccInd.o \
	headFinder.o \
	headFinderCh.o \
	treeHistSf.o \
	utils.o \
	trainRs.o
trainRs: $(TRAINRS_OBJS)
	g++ $(CFLAGS) $(TRAINRS_OBJS) -o trainRs 

 
KN3COUNTS_OBJS = \
	ClassRule.o \
	ECArgs.o \
	EmpNums.o \
	Feat.o \
	Feature.o \
	FeatIter.o \
	FeatTreeIter.o \
	FeatureTree.o \
	InputTree.o \
	Pst.o \
	Phegt.o \
	Term.o \
	auxify.o \
	ccInd.o \
	headFinder.o \
	headFinderCh.o \
	treeHistSf.o \
	utils.o \
	kn3Counts.o
kn3Counts: $(KN3COUNTS_OBJS)
	g++ $(CFLAGS) $(KN3COUNTS_OBJS) -o kn3Counts 

AUXIT_OBJS = \
	BrownIter.o \
	EmpNums.o \
	ECArgs.o \
	InputTree.o \
	Term.o \
	headFinder.o \
	headFinderCh.o \
	utils.o \
	auxify.o \
	auxIt.o
auxIt: $(AUXIT_OBJS)
	g++ $(AUXIT_OBJS) -o auxIt

PSGT_OBJS = \
	ECArgs.o \
	EmpNums.o \
	InputTree.o \
	Term.o \
	auxify.o \
	headFinder.o \
	headFinderCh.o \
	utils.o \
	UnitRules.o \
	pSgT.o
pSgT: $(PSGT_OBJS)
	g++ $(PSGT_OBJS) -o pSgT


PTGNT_OBJS = \
	ECArgs.o \
	EmpNums.o \
	InputTree.o \
	Pst.o \
	Phegt.o \
	Term.o \
	headFinder.o \
	headFinderCh.o \
	utils.o \
	pTgNt.o
pTgNt: $(PTGNT_OBJS)
	g++ $(PTGNT_OBJS) -o pTgNt

PSFGT_OBJS = \
	ECArgs.o \
	EmpNums.o \
	InputTree.o \
	Pst.o \
	Phegt.o \
	Term.o \
	headFinder.o \
	headFinderCh.o \
	utils.o \
	pSfgT.o
pSfgT: $(PSFGT_OBJS)
	g++ $(PSFGT_OBJS) -o pSfgT

PUGT_OBJS = \
	ECArgs.o \
	EmpNums.o \
	InputTree.o \
	Pst.o \
	Phegt.o \
	Term.o \
	auxify.o \
	headFinder.o \
	headFinderCh.o \
	utils.o \
	pUgT.o
pUgT: $(PUGT_OBJS)
	g++ $(PUGT_OBJS) -o pUgT

GETPROBS_OBJS = \
	ClassRule.o \
	ECArgs.o \
	EmpNums.o \
	Feat.o \
	Feature.o \
	FeatureTree.o \
	InputTree.o \
	Pst.o \
	Phegt.o \
	Smoother.o \
	Term.o \
	auxify.o \
	ccInd.o \
	headFinder.o \
	headFinderCh.o \
	treeHistSf.o \
	utils.o \
	trainRsUtils.o \
	getProbs.o
getProbs:$(GETPROBS_OBJS)
	g++ $(CFLAGS) $(GETPROBS_OBJS) -o getProbs 

all: rCounts selFeats iScale trainRs pSgT pTgNt pUgT kn3Counts pSfgT 

clean: 
	rm -f *.o 

realclean: 
	rm -f *.o rCounts selFeats iScale trainRs pSgT pTgNt pUgT kn3Counts

#---------------------------------------------------------------------

#ifeq ($(SWITCH),-LCh)
#HEAD_PROG := pSfgT # Chinese head finder
#else
#ifneq ($(SWITCH),-M)
#ifdef SWITCH
#$(error invalid SWITCH specified: $(SWITCH))
#endif
#endif #ifneq ($(SWITCH),-M)
#endif #ifeq ($(SWITCH),-LCh)




#ifeq ($(MODE),lm)
#SWITCH := "-M"
#else
#ifeq ($(MODE),Chinese)
#HEAD_PROG=pSfgT
#SWITCH := "-LCh"
#else
#$(error invalid MODE specified)
#endif
#endif




# 03/13/07 Matt Lease
#
# "make" offers the advantage of easy parallelization (on one machine) and managing 
# dependencies, but in other ways it's much more restricted than shell programming.
# It's pretty clear you can't do everything here, so what goes in make and what do 
# you do externally?
#
# Minimally, I could just do the bare essentials for parallelization, and leave the rest
# to shell scripting.

## Directories

#ROOT=/cygdrive/c/matt/work-new
#SRILM=$(ROOT)/matt/srilm
#COLLECTION=$(ROOT)/matt/collection/SJMN-split
#LM=$(SRILM)/lm

#REAL_SRILM=$(ROOT)/srilm

## Programs

#NGC=$(REAL_SRILM)/bin/msvc/ngram-count.exe
#build_unigram=$(NGC) -unk -order 1 

##NG=$(REAL_SRILM)/lm/bin/msvc_g/ngram2.exe 
#NG=$(REAL_SRILM)/lm/bin/msvc/ngram2.exe 
#rerank=$(NG)-unk -lambda 0.6 # -bayes 0

## Input
#collection-lm = $(SRILM)/collection_lm
##lm-filenames = $(SRILM)/tmp/nbest-lmquery
#lm-filenames = $(SRILM)/tmp/nbest-lmfiles
#queries = $(SRILM)/query/051-150.porter

## Build document language models

## could also pass list of targets as arguments to make, but run into unix's max number of 
## arguments limit since I have to build thousands of doc models
##
##	sed -e 's| .*||g' $(nbest-query-docs) | xargs -n 1 cygpath > $(lm-filenames)
##
##lm-files = $(shell sed -e 's| .*||g' $(lm-filenames))
#lm-files = $(shell cat $(lm-filenames)) 
#LMs : $(lm-files)

## could instead just call a shell script with the arguments
#$(LM)/% : $(NGC)
#	mkdir -p $(shell dirname $@)
#	$(build_unigram) -text `cygpath -w $(COLLECTION)/$*` -lm `cygpath -w $@` > $@.log 2>&1

#.PHONY: clean-LMs
#clean-LMs:
#	rm -rf $(LM)

## Rerank retrieved documents across queries
## lmquery-% represents some subset of the nbest-docs-lmquery lines
## lmquery-% targets should be( given as arguments to make
#tmp/rescored-% : $(SRILM)/tmp/lmquery-% $(LMs) $(NG)
#	$(rerank) -lm `cygpath -w $(collection-lm)` -mix-lm `cygpath -w $<` \
#	          -ppl `cygpath -w $(queries)` > $@ 2> $@.err

#clean-rescored:
#	rm -f tmp/rescored-*


#=========================================================================================
# Stuff I've played with but am not using

#			 -mix-lm `cygpath -w $(collection-lm)` \


#docs := $(shell sed -e 's| .*||g' -e "s|$(LM)|$(COLLECTION)|g" $(SRILM)/5best-docs-lmquery) 
#docs := $(shell sed -e 's| .*||g' -e "s|srilm|$(COLLECTION)|g" $(SRILM)/5best-docs-lmquery) 

.PHONY: test
test:
#	echo $(LMs) | sed -e 's| |\n|g' | head
	echo $(LM) 

#=========================================================================================
# Stolen from Mark's makefile for example

#NFOLDS=20
#FOLDS=00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19
#NPARSES=50


#NBESTFILES= $(foreach fold,$(FOLDS),$(NBESTDIR)/fold$(fold).gz)

#nbesttrain: $(NBESTFILES)

## This goal copies and gzips the output of the n-best parser
## into the appropriate directory for training the reranker.
##
#.PRECIOUS: $(NBESTDIR)/fold%.gz
#$(NBESTDIR)/fold%.gz: $(TMP)/fold%/$(NPARSES)best
#	mkdir -p $(NBESTDIR)
#	gzip -c $+ > $@

#$(TMP)/fold%/$(NPARSES)best: $(TMP)/fold%/DATA $(TMP)/fold%/yield $(NBESTPARSER)
#	$(NBESTPARSER) -l999 -K -N$(NPARSES) $(@D)/DATA/ $(@D)/yield > $@


#=========================================================================================
# DOCUMENTATION

# a few notes 
# * can't declare variables in commands for target
# * make reserves $, so watch for in use of sed
# * variable initializion doesn't support dependencies (all init at start of make)
# * dependencies can only be a list of targets
# * can't do piping in "$(shell ...) calls

# TARGETS is the list of targets built when make is called
# without arguments
#
#TARGETS = PARSE reranker-runtime evalb


#target... : dependencies ...
#		command
#		...
#		...

#A variable is defined with the syntax

#var_name = definition

#and is expanded with with $(var_name).

#A pattern rule contains the character '%' (exactly one of them) in the target
#the '%' matches any nonempty substring, while other characters match only themselves. 
#'%' in a dependency of a pattern rules stands for the same stem that was matched by the '%' in the target. 

#Here is a table of the most useful automatic variables:

#$*
#    The stem with which an implicit rule matches. If the target is 'dir/a.foo.b' and the target pattern is 'a.%.b' then the stem is 'dir/foo'. The stem is useful for constructing names of related files.

#$@
#    The file name of the target of the rule.

#$<
#    The name of the first dependency.

#$?
#    The names of all the dependencies that are newer than the target, with spaces between them.

#$^
#    The names of all the dependencies, with spaces between them.

#-----

#tests = $(basename $(wildcard t1.*.out))
#test: $(tests)

#In this example the $(wildcard ...) function builds a list of all the files in the current directory matching the Unix regular expression 't1.*.out'. 
#This list, separated by spaces, is the argument to the function $(basename ...)

#-----

#Adding '-k' to the invocation of make. Make will then do all of the commands it can, even if some of them result in errors

#-----

#If make gets a fatal signal while a command is executing, it may delete the target file that the command was supposed to update. 
#You can prevent the deletion of a target file in this way by making the special target .PRECIOUS depend on it. 

#-----

#A phony target is one that is not really the name of a file. It is just a name for some commands to be executed when you make an explicit request.

#If you write a rule whose commands will not create the target file, the commands will be executed every time the target comes up for remaking.
#Because the rm command does not create a file named `clean', probably no such file will ever exist. 
#Therefore, the rm command will be executed every time you say `make clean'.

#Thus, you first write the line that states that clean is a phony target, then you write the rule, like this:

#.PHONY: clean
#clean:
#        rm *.o temp


#`-s':    Silent operation; do not print the commands as they are executed. 
