##-*- Mode: Makefile -*-
##
## File: Makefile
## Author: Bryan Jurish <moocow@cpan.org>
## Description:
##  + top-level makefile for corpus preparation via dta-tokwrap
## Usage:
##  + DO NOT edit this file (unless you *really* know what you're doing)
##  + Copy the file "User.mak" which came with the distribution to
##    a new file, e.g. "MyConfig.mak", and edit the new file to suit your
##    needs
##  + Call make with "config=MyConfig.mak" on the command line, e.g.:
##    $ make config=MyConfig.mak all
##  + ... you atta be in buttah ...
##======================================================================

##======================================================================
## Configuration: User

config ?= User.mak
include $(config)

##======================================================================
## Configuration: Defaults

##--------------------------------------------------------------
## Configuration: Defaults: sources & targets

xmldir ?= .
xml    ?= $(wildcard $(xmldir),*.chr.xml) $(wildcard $(xmldir),*.char.xml)
outdir = .
tmpdir = $(outdir)

corpus ?= $(notdir $(xmldir)) ##-- UNUSED

XML = $(notdir $(xml))

##--------------------------------------------------------------
## Configuration: Defaults: shell

## NICE : command prefix for considerate subprocesses
##  + -c2 -n5 : best-effort class, 5/7 nice
##  + -c3     : idle class; very nice
#NICE ?= ionice -c2 -n5 nice -n10
NICE ?= ionice -c3 nice -n10

## SHELL : built-in make variable for command shell
##  + we (ab)use it here to set nice and ionice priorities
##  + setting 'pipefail' option causes pipelines to fail if any component command fails (not just the last command)
ifeq ($(SHELL),/bin/sh)
 SHELL = $(NICE) /bin/bash -o pipefail
endif

##--------------------------------------------------------------
## Configuration: Defaults: tokwrap

## TOKWRAP_OPTS
##  + all options for dta-tokwrap.perl
TOKWRAP_OPTS = -keep

ifeq "$(inplace)" ""
ifneq "$(shell test -f ../src/dtatw-mkindex.c && echo yup)" "yup"
inplace=no
else
inplace=yes
endif
endif

ifeq "$(inplace)" "yes"
TOKWRAP_OPTS += -inplace
else
TOKWRAP_OPTS += -noinplace
endif

##--------------------------------------------------------------
## Configuration: Defaults: dta-tokwrap.perl: behavior

ifeq "$(dummytok)" ""
ifeq "$(shell which waste 2>/dev/null)" ""
override dummytok := yes
else
override dummytok := no
endif
endif

ifeq "$(dummytok)" "no"
#TOKWRAP_OPTS += -nodummytok -weak-hints
TOKWRAP_OPTS += -nodummytok
else
#TOKWRAP_OPTS += -dummytok -strong-hints
TOKWRAP_OPTS += -dummytok
endif

ifneq "$(abbrevlex)" ""
TOKWRAP_OPTS += -abbrev-lex="$(abbrevlex)"
endif

ifneq "$(mwelex)" ""
TOKWRAP_OPTS += -mwe-lex="$(mwelex)"
endif

##--------------------------------------------------------------
## Configuration: Defaults: dta-tokwrap.perl: verbosity & logging

ifneq "$(verbose)" ""
TOKWRAP_OPTS += -verbose=$(verbose)
endif

ifneq "$(loglevel)" ""
TOKWRAP_OPTS += -log-level="$(loglevel)"
endif

ifneq "$(logfile)" ""
TOKWRAP_OPTS += -log-file="$(logfile)"
endif

ifneq "$(stderr)" ""
ifeq "$(stderr)" "no"
TOKWRAP_OPTS += -nostderr
else
TOKWRAP_OPTS += -stderr  ##-- default
endif
endif

ifneq "$(trace)" ""
ifeq "$(trace)" "no"
TOKWRAP_OPTS += -notrace
else
TOKWRAP_OPTS += -trace
endif
endif

ifneq "$(profile)" ""
ifneq "$(profile)" "no"
TOKWRAP_OPTS += -profile
else
TOKWRAP_OPTS += -noprofile
endif
endif

##-- user options
TOKWRAP_OPTS += $(twopts)

##--------------------------------------------------------------
## Configuration: Defaults: ddc attributes

##-- DDC_ATTRS : arguments for dtatw-get-ddc-attrs.perl
DDC_ATTRS ?= -xr -xc -noxp -pb -bb -u -v=0


##--------------------------------------------------------------
## Configuration: Defaults: programs & in-place execution

PERL = perl

ifneq "$(TOKWRAP_ROOT)" ""

SCRIPT_DIR  ?= $(TOKWRAP_ROOT)scripts/
XSL_DIR     ?= $(SCRIPT_DIR)
PROG_DIR    ?= $(TOKWRAP_ROOT)src/
TOKWRAP_DIR ?= $(TOKWRAP_ROOT)DTA-TokWrap/
TOKWRAP     ?= $(TOKWRAP_DIR)dta-tokwrap.perl $(TOKWRAP_OPTS)

TOKWRAP_DEPS ?=
PROG_DEPS    ?=
SCRIPT_DEPS  ?=

else
ifeq "$(inplace)" "yes"

XSL_DIR = ../scripts

PROG_DIR  = ../src/
PROG_DEPS = $(wildcard $(PROG_DIR)*.c) $(wildcard $(PROG_DIR)*.h) $(wildcard $(PROG_DIR)*.l)

SCRIPT_DIR  = ../scripts/
SCRIPT_DEPS = $(wildcard $(SCRIPT_DIR)dtatw-*.perl)

TOKWRAP_DIR  = ../DTA-TokWrap
TOKWRAP_SRC  = $(TOKWRAP_DIR)/dta-tokwrap.perl
#TOKWRAP      = $(PERL) -Mlib=$(TOKWRAP_DIR)/blib/lib $(TOKWRAP_DIR)/blib/script/dta-tokwrap.perl $(TOKWRAP_OPTS)
TOKWRAP      = ./dta-tokwrap.perl $(TOKWRAP_OPTS)
TOKWRAP_DEPS = $(TOKWRAP_SRC)

else

XSL_DIR ?= /usr/local/share/dta-tokwrap/stylesheets

TOKWRAP_DIR ?=
TOKWRAP_DEPS ?=
TOKWRAP ?= dta-tokwrap.perl $(TOKWRAP_OPTS)

PROG_DIR  ?=
PROG_DEPS ?=

SCRIPT_DIR  ?=
SCRIPT_DEPS ?=

endif
endif

##--------------------------------------------------------------
## Configuration: well-formed checking

ifeq "$(xml_wfcheck)" ""

ifneq "$(shell which xmlwf 2>/dev/null)" ""
xml_wfcheck := xmlwf
else
ifneq "$(shell which xmlstarlet 2>/dev/null)" ""
xml_wfcheck := xmlstarlet val -w -b -e
else
ifneq "$(shell which xml 2>/dev/null)" ""
xml_wfcheck := xml val -w -b -e
else
##--/ifneq "$(shell which xml 2>/dev/null)" ""
xml_wfcheck := xmllint --noout
endif
##--/ifneq "$(shell which xml 2>/dev/null)" "":else
endif
##--/ifneq "$(shell which xmlstarlet 2>/dev/null)" ""
endif
##--/ifneq "$(shell which xmlwf 2>/dev/null)" ""

endif
##--/ifeq "$(xml_wfcheck)" ""

##--------------------------------------------------------------
## Configuration: Defaults: archiving & distribution

ARC_TARGETS ?= \
	Makefile \
	User.mak \
	$(config) \
	$(logfile) \
	$(XML:.xml=.t.xml)

##--------------------------------------------------------------
## Configuration: Defaults: cleanup

CLEAN_DEPS ?=
CLEAN_FILES ?=

REALCLEAN_DEPS += clean
REALCLEAN_FILES += \
	$(filter-out $(xml),$(XML)) \

##--------------------------------------------------------------
## Configuration: Defaults: Top-Level targets

#ALL_TARGETS ?= src-xml-errors t-xml s-xml w-xml a-xml so-xml-errors
ALL_TARGETS ?= src-xml-errors t-xml t-xml-errors

ALL_XML_TARGETS ?= all u-xml cpx cab-xml-xlit cws-xml cws-noc-fmt-xml extra-xml-errors
#cab-xml-full
#cab-xml-xlit

EXTRA_TARGETS ?= all-xml summary

SUMMARY_TARGETS ?= rw-summary unk-summary

CASCADE_TARGETS ?= all-xml

##======================================================================
## Rules: top-level

all: $(ALL_TARGETS)

all-xml: $(ALL_XML_TARGETS)

extra: $(EXTRA_TARGETS)

summary: $(SUMMARY_TARGETS)

cascade-all: $(ALL_XML_TARGETS)

.SECONDARY:
.DELETE_ON_ERROR:

##======================================================================
## Rules: show configuration

config: twconfig

twconfig:
	@echo "inplace=$(inplace)"
	@echo "dummytok=$(dummytok)"
	@echo "abbrevlex=$(abbrevlex)"
	@echo "mwelex=$(mwelex)"
	@echo "verbose=$(verbose)"
	@echo "loglevel=$(loglevel)"
	@echo "logfile=$(logfile)"
	@echo "stderr=$(stderr)"
	@echo "trace=$(trace)"
	@echo "profile=$(profile)"
	@echo "TOKWRAP=$(TOKWRAP)"

srcconfig:
	@echo "xmldir=$(xmldir)"
	@echo "xml=$(xml)"
	@echo "XML=$(XML)"

##======================================================================
## Rules: link in sources (don't rely on this!)

#$(XML): xml
xml: $(xml)
	rm -f $(filter-out $(xml),$(XML))
	ln -s $^ .

no-xml:
	test -z "$(filter-out $(xml),$(XML))" || rm -f $(filter-out $(xml),$(XML))
	rm -f xml.stamp

REALCLEAN_DEPS += no-xml

##======================================================================
## Rules: mkindex: xml -> xx=(cx,sx,tx)

xx: cx sx tx

cx: $(XML:.xml=.cx)
sx: $(XML:.xml=.sx)
tx: $(XML:.xml=.tx)

no-cx: ; rm -f $(XML:.xml=.cx)
no-sx: ; rm -f $(XML:.xml=.sx)
no-tx: ; rm -f $(XML:.xml=.tx)

no-xx: no-cx no-sx no-tx

##-- xml -> (cx,sx,tx): individual rule
%.xx: %.cx %.sx %.tx
%.cx: %.cx %.sx %.tx
%.sx: %.cx %.sx %.tx
%.tx: %.cx %.sx %.tx

%.cx %.sx %.tx: $(xmldir)/%.xml tokwrap
ifeq "$(TOKWRAP_ALL)" "yes"
	$(TOKWRAP) -t=mkindex $<
else
	$(PROG_DIR)dtatw-mkindex $< $*.cx $*.sx $*.tx
endif


CLEAN_FILES += *.cx *.sx *.tx *.xx

##-- aliases for debugging
sx-fmt: $(XML:.xml=.sx.fmt)
no-sx-fmt: ; rm -f *.sx.fmt

sx-nons: $(XML:.xml=.sx.nons)
no-sx-nons: ; rm -f *.sx.nons *.sx.nons.fmt

sx-nons-fmt: $(XML:.xml=.sx.nons.fmt)
no-sx-nons-fmt: ; rm -f *.sx.nons.fmt
CLEAN_FILES += *.sx.nons *.sx.fmt *.sx.nons.fmt *.sx.fmt.nons

MISC_FILES += \
	$(XML:.xml=.cx) \
	$(XML:.xml=.sx) \
	$(XML:.xml=.tx)

##======================================================================
## Rules: serialization (serialized block index: bx0)

bx0: $(XML:.xml=.bx0)
%.bx0: %.sx tokwrap
	$(TOKWRAP) -t=mkbx0 $(xmldir)/$*.xml

no-bx0: ; rm -f *.bx0 bx0.stamp
CLEAN_FILES += *.bx0 bx0.stamp

MISC_FILES += $(XML:.xml=.bx0)

##======================================================================
## Rules: serialized text + index (bx, txt)

serialize: txt

bx: bx-txt
txt: bx-txt
bx-txt: $(XML:.xml=.bx)

%.bx:  %.bx %.txt
%.txt: %.bx %.txt

##-- separating these rules gets us t-xml re-made on 2 subsequent (make all)s
#%.bx: %.bx0 %.tx tokwrap
#	$(TOKWRAP) -t=mktxt $(xmldir)/$*.xml
#
#%.txt: %.bx0 %.tx tokwrap
#	$(TOKWRAP) -t=mktxt $(xmldir)/$*.xml
##
##--
%.bx %.txt: %.bx0 %.tx tokwrap
	$(TOKWRAP) -t=mktxt $(xmldir)/$*.xml

no-bx-txt: ; rm -f *.bx *.txt
no-bx: no-bx-txt
no-txt: no-bx-txt
CLEAN_FILES += *.bx *.txt

MISC_FILES += $(XML:.xml=.bx) $(XML:.xml=.txt)

##======================================================================
## Rules: tokenization (raw)

t0: $(XML:.xml=.t0)
%.t0: %.txt tokwrap
ifeq "$(TOKENIZER)" ""
	$(TOKWRAP) -t=tokenize0 $(xmldir)/$*.xml
else
	$(TOKENIZER) $< > $@ 
endif

no-t0: ; rm -f *.t0
CLEAN_FILES += *.t0

MISC_FILES += $(XML:.xml=.t0)

##======================================================================
## Rules: tokenization (post-processing)

t1: $(XML:.xml=.t1)
%.t1: %.t0 tokwrap
	$(TOKWRAP) -t=tokenize1 $(xmldir)/$*.xml

no-t1: ; rm -f *.t1
CLEAN_FILES += *.t1
MISC_FILES += $(XML:.xml=.t1)

##======================================================================
## Rules: tokenization (symlink)

t: $(XML:.xml=.t)
%.t: %.t1
	rm -f $@
	ln -s $< $@

#	ln $< $@	##-- hardlinks make problems with e.g. -j4


no-t: ; rm -f *.t *.t0 *.t1
CLEAN_FILES += *.t
MISC_FILES += $(XML:.xml=.t)

##======================================================================
## Rules: tokenization: serial-byte to xml-byte conversion (programs only atm)
##  + for debugging

xt: $(XML:.xml=.xt)
%.xt: %.t %.cx %.bx tokwrap
	$(PROG_DIR)dtatw-b2xb $< $*.cx $*.bx $@ $*.xml

no-xt: ; rm -f *.xt
CLEAN_FILES += *.xt
MISC_FILES += $(XML:.xml=.xt)


##======================================================================
## Rules: tokenized: master xml output

t-xml: $(XML:.xml=.t.xml)

##-- always use dta-tokwrap.perl for tok2xml, since it also sorts back into pseudo-source-order using fast regexes
%.t.xml: %.t %.cx %.bx tokwrap
#ifeq "$(TOKWRAP_ALL)" "yes"
	$(TOKWRAP) -t=tok2xml $(xmldir)/$*.xml
#else
#%.t.xml: %.xt tokwrap
#	$(TOKWRAP) -t=tok2xml -po=b2xb=off -do=tokfile1=$< $(xmldir)/$*.xml
#	$(PROG_DIR)dtatw-tok2xml $< - *.xml | xsltproc -o "$@" $(SCRIPT_DIR)dtatw-txmlsort.xsl -
#endif

no-t-xml: ; rm -f *.t.xml
CLEAN_FILES += *.t.xml


##======================================================================
## Rules: standoff (via C utilities) : OBSOLETE


##======================================================================
## Rules: source+standoff integration ("splicing")

splice: cwst-xml
splice-noc: wst-fmt-xml

cws-noc-fmt-xml: $(XML:.xml=.cws.noc.fmt.xml)
cws-noc-xml: $(XML:.xml=.cws.noc.xml)
ws-fmt-xml: $(XML:.xml=.ws.fmt.xml)
ws-xml: $(XML:.xml=.ws.xml)


##-- splice: (.char.xml + .t.xml) --> .cws.xml
cw-xml: cws-xml
cws-xml: $(XML:.xml=.cws.xml)
%.cws.xml: $(xmldir)/%.xml %.t.xml tokwrap
	$(TOKWRAP) -t=addws $(xmldir)/$*.xml

no-cw-xml: no-cws.xml
no-cws-xml: ; rm -r *.cws.xml
CLEAN_FILES += *.cws.xml

##-- splice: .cws.xml --> .ws.xml (aka .cws.noc.xml)
ws-xml: $(XML:.xml=.ws.xml)
%.ws.xml: %.cws.xml scripts
	$(SCRIPT_DIR)dtatw-rm-c.perl $< > $@

%.ws.fmt.xml: %.ws.xml scripts
	$(SCRIPT_DIR)dtatw-format.perl -b -o=$@ $<

no-ws-xml: ; rm -f *.ws.xml *.ws.fmt.xml
CLEAN_FILES += *.ws.xml

##-- splice: .cws.xml + .t.xml -> .cwst.xml  : should be OBSOLETE! (Thu, 04 Oct 2012 15:11:31 +0200)
cwst-xml: $(XML:.xml=.cwst.xml)
%.cwst.xml: %.cws.xml %.t.xml tokwrap
	$(TOKWRAP) -t=cwstxml -po soIgnoreAttrs="c,xb" -do cwstbasefile=$< -do cwstsofile=$(word 2,$^) -do cwstfile=$@ $(xmldir)/$*.xml

no-cwst-xml: ; rm -f *.cwst.xml
CLEAN_FILES += *.cwst.xml

##-- splice: .ws.xml + .t.xml -> .wst.xml   : should be OBSOLETE! (Thu, 04 Oct 2012 15:11:31 +0200)
wst-xml: $(XML:.xml=.wst.xml)
%.wst.xml: %.ws.xml %.t.xml tokwrap
	$(TOKWRAP) -t=cwstxml -po soIgnoreAttrs="c,xb" -do cwstbasefile=$< -do cwstsofile=$(word 2,$^) -do cwstfile=$@ $(xmldir)/$*.xml

%.wst.fmt.xml: %.wst.xml scripts
	$(SCRIPT_DIR)dtatw-format.perl -b -o=$@ $<

no-wst-xml: ; rm -f *.wst.xml *.wst.fmt.xml
CLEAN_FILES += *.wst.xml

##-- splice: vars
#CWS_XML_FILES = $(XML:.xml=.cws.xml) $(XML:.ws.xml)
CWS_XML_FILES = $(XML:.xml=.cws.xml) $(XML:.wst.xml)

##======================================================================
## Rules: tcf (text only)

tcf: $(XML:.xml=.tcf)

%.tcf: %.xml
	$(TOKWRAP) -t=tei2tcf -weak-hints $(xmldir)/$*.xml

CLEAN_FILES += *.tcf

##======================================================================
## Rules: tcfws.xml (tcf->tei)

tcfws: $(XML:.xml=.tcfws)

%.tcfws: %.tcf %.t1
	$(TOKWRAP) -t=tcfdecode $(xmldir)/$*.xml

CLEAN_FILES += *.tcf *.tcf[twx] *.tcfws *.tcfws.xml


##======================================================================
## Rules: tokenized: extended output (.u.xml) :: PROBABLY BROKEN
## + //w/@u  : unicruft text
## + //w/@t0 : raw text
## + //w/@u0 : unicrufted raw text raw
## + //w/@xp : xpath (also //s/@xp if compressed)
## + //w/@cs : charSpans cs="CID+LEN CID+LEN ... CID+LEN"

##-- %.u.xml: unicruft + raw text + xp (xpath
u-xml: $(XML:.xml=.u.xml)

##-- {.t.xml,.txt}->.u.xml : (+t0, +cruft, -pb)
#%.u.xml: %.t.xml %.txt tokwrap
#	$(SCRIPT_DIR)dtatw-txml2uxml.perl -tf=$*.txt $< -o $@
##
##-- {.t.xml,.txt,.wpx}->.u.xml : (+t0, +cruft, +pb)
#%.u.xml: %.t.xml %.txt %.wpx tokwrap
#	$(SCRIPT_DIR)dtatw-txml2uxml.perl -tf=$*.txt -wpx=$*.wpx $< -o $@
##
##-- {.t.xml,.txt,.cpx}->.u.xml : (+t0, +cruft, +pb)
#%.u.xml: %.t.xml %.txt %.cpx tokwrap
#	$(SCRIPT_DIR)dtatw-txml2uxml.perl -tf=$*.txt -cpx=$*.cpx $< -o $@
##
##-- {.t.xml,.txt,.cpx,.cx}->.u.xml : (+t0, +cruft, +pb, +cs)
#%.u.xml: %.t.xml %.txt %.cpx %.cx tokwrap ##-- weird: looks like loop in make dependencies!
%.u.xml: %.t.xml %.cpx tokwrap
	$(SCRIPT_DIR)dtatw-txml2uxml.perl -tf=$*.txt -cpx=$*.cpx -cx=$*.cx $< -o $@


no-u-xml: ; rm -f *.u.xml
CLEAN_FILES += *.u.xml

##======================================================================
## Rules: auxilliary indices

##-- %.cpx: character-pagebreak index
cpx: $(XML:.xml=.cpx)

%.cpx: $(xmldir)/%.xml tokwrap
	$(SCRIPT_DIR)dtatw-mkpx.perl -v=0 $< -o $@

no-cpx: ; rm -f *.cpx
CLEAN_FILES += *.cpx

##-- %.wpx: word-pagebreak index
wpx: $(XML:.xml=.wpx)
%.wpx: %.cw.xml
	$(SCRIPT_DIR)dtatw-mkpx.perl -v=0 -i '^w$$' $< -o $@
no-wpx: ; rm -f *.wpx
CLEAN_FILES += *.wpx

##======================================================================
## Rules: tokenized: ddc attributes (%.ddc.t.xml)
## + //w/@xr  : ddc $Rend field (rendition)
## + //w/@xc  : ddc $Context field (structural context)
## + //w/@xp  : (later) ddc $Xpath field (xpath)
## + //w/@pb  : ddc $Page field (from //pb/@facs)
## + //w/@bb  : ddc $Coord field (bounding box)
## + //w/@u   : ddc $Token field (unicruft)

ddc-t-xml: $(XML:.xml=.ddc.t.xml)

%.ddc.t.xml: %.t.xml $(xmldir)/%.xml
	$(SCRIPT_DIR)dtatw-get-ddc-attrs.perl $(DDC_ATTRS) -o $@ $^

no-ddc-t-xml: ; rm -f *.ddc.t.xml
CLEAN_FILES += *.ddc.t.xml


##======================================================================
## Rules: well-formed check

xml-errors: src-xml-errors so-xml-errors
extra-xml-errors: cab-xml-errors cws-xml-errors

src-xml-errors: $(corpus).src.xml_errors
t-xml-errors: $(corpus).t.xml_errors
ddc-t-xml-errors: $(corpus).ddc.t.xml_errors
cws-xml-errors: $(corpus).cws.xml_errors
ws-xml-errors: $(corpus).ws.xml_errors
all-xml-errors: $(corpus).all.xml_errors

t0-errors: $(corpus).t0_errors
%.t0.errors: %.t0 %.txt
	-$(SCRIPT_DIR)dtatw-t-check.perl -q "$*.t0" "$*.txt" 2>&1 | tee $@
#	-$(SCRIPT_DIR)dtatw-t-check.perl -q "$*.t0" "$*.txt" >$@ 2>&1

ifneq "$(xml_wfcheck)" "no"

%.src.xml.errors: $(xmldir)/%.xml
	-$(xml_wfcheck) $< 2>&1 > $@

%.xml.errors: %.xml
	-$(xml_wfcheck) $< 2>&1 > $@

$(corpus).src.xml_errors: $(XML:.xml=.src.xml.errors)
	cat $^ > $@

$(corpus).t.xml_errors: $(T_XML_FILES:=.errors)
	cat $^ > $@

$(corpus).ddc.t.xml_errors: $(T_XML_FILES:.t.xml=.ddc.t.xml.errors)
	cat $^ > $@

$(corpus).cws.xml_errors: $(XML:.xml=.cws.xml.errors)
	cat $^ > $@

$(corpus).ws.xml_errors: $(XML:.xml=.ws.xml.errors)
	cat $^ > $@

$(corpus).t0_errors: $(XML:.xml=.t0.errors)
	cat $^ > $@

%.all.xml_errors: %.t.xml_errors %.cws.xml_errors %.ws.xml_errors
	cat $^ > $@

else
##--/ifneq "$(xml_wfcheck)" "no"

$(corpus).%_errors:
	echo "Implicit XML well-formedness check DISABLED" > $@

endif
##--/ifeq "$(xml_wfcheck)" "no"

no-xml-errors: ; rm -f *.xml.errors *.xml_errors
no-t0-errors: ; rm -f *.t0.errors *.t0_errors
no-errors: ; rm -f *.errors *_errors

CLEAN_FILES += *.xml.errors *.xml_errors *.t0.errors *.t0_errors

##======================================================================
## Rules: Summaries

##--------------------------------------------------------------
## Rules: Summaries: types

corpus-types: $(corpus).types
%.types: %.typf
	cut -d$$'\t' -f 1 $< > $@

corpus-lex: $(corpus).lex
%.lex: %.types
	cat $^ \
	| unicruft -D \
	| perl -CIO -n -e'print if (/^[[:alpha:]]+$$/);' \
	| unicruft -d | sort -u > $@ 

##-- $(corpus).lexf : for use as WORDS variable in dta/automata/pho-id/Makefile
corpus-lexf: $(corpus).lexf
%.lexf: %.typf
	cat $^ \
	| unicruft -D \
	| perl -CIO -n -e'print "$$1\t$$2\n" if (/^([[:alpha:]]+)\t\[freq\] (\d+)$$/);' \
	| unicruft -d | sort -u > $@ 

corpus-typf: $(corpus).typf
$(corpus).typf: $(XML:.xml=.t)
	cat $^ | (grep -v '^%%' || true) | cut -d$$'\t' -f1 | (grep . || true) | sort | uniq -c \
	| sed -e's/^[ ]*\([0-9][0-9]*\) \(.*\)/\2\t\[freq\] \1/1' \
	> $@ 

REALCLEAN_FILES += *.types *.typf

##======================================================================
## Rules: archiving

arc: $(arcfile)

arcdir: $(arcname)/stamp

$(arcname)/stamp: $(ARC_TARGETS) $(xml)
	rm -rf $(arcname)
	mkdir $(arcname)
	mkdir $(arcname)/data
	for f in $(ARC_TARGETS); do \
	  test -e $(arcname)/data/$$f || ln `readlink -f $$f` $(arcname)/data/`basename $$f`; \
	done
	if test "$(arc_want_sources)" = "yes"; then \
	  mkdir $(arcname)/sources; \
	  for f in $(xml); do \
	    ln `readlink -f $$f` $(arcname)/sources/`basename $$f` ; \
	  done; \
	fi
	date -I > $@

no-arcdir:
	rm -rd $(arcname)

$(arcfile): arcdir
	rm -rf $(arcfile)
	GZIP="$(arc_gzip)" tar cvzf $@ $(arcname)
	@echo "Created archive $@"

no-arc: no-arcdir
	rm -f $(arcfile)

##======================================================================
## Rules: install

ifeq "$(USER)" "root"
 INSTALL_DATA = install -p -o $(install_user) -g $(install_group) -m $(install_mode)
 INSTALL_DIR  = install -p -o $(install_user) -g $(install_group) -m $(install_dirmode) -d
else
 INSTALL_DATA = install -m $(install_mode)
 INSTALL_DIR  = mkdir -p
endif

INSTALL_FILES =
INSTALL_DEPS  =

##-- install targets: defaults
install_to ?=./installed

install_makefile ?=yes
install_sources  ?=yes
install_standoff ?=yes
install_cws_xml  ?=yes
install_cab_xml  ?=yes
install_summaries ?=no
install_misc      ?=no
install_cab_misc  ?=no

##-- install targets: append
ifneq "$(install_makefile)" "no"
 INSTALL_FILES += Makefile $(wildcard *.mak) $(config)
endif

ifneq "$(install_sources)" "no"
 INSTALL_DEPS += $(patsubst %,$(xmldir)/%,$(XML))
endif

ifneq "$(install_standoff)" "no"
 INSTALL_FILES += $(STANDOFF_XML_FILES)
endif

ifneq "$(install_cws_xml)" "no"
 INSTALL_FILES += $(XML:.xml=.cws.xml)
endif

ifneq "$(install_cab_xml)" "no"
 INSTALL_FILES += $(CAB_XML_FILES)
endif

ifneq "$(install_summaries)" "no"
 INSTALL_FILES += $(SUMMARY_FILES)
endif

ifneq "$(install_misc)" "no"
 INSTALL_FILES += $(MISC_FILES)
endif

ifneq "$(install_cab_misc)" "no"
 INSTALL_FILES += $(CAB_MISC_FILES)
endif

INSTALL_FILES += $(INSTALL_EXTRA_FILES)

##-- install: rules
install: $(INSTALL_FILES) $(INSTALL_DEPS)
	$(INSTALL_DIR) $(install_to)
	for f in $(INSTALL_FILES); do $(INSTALL_DATA) -v $$f $(install_to)/$$f ;done
ifneq "$(install_sources)" "no"
	$(INSTALL_DIR) $(install_to)/$(notdir $(xmldir))
	for f in $(XML); do $(INSTALL_DATA) -v $(xmldir)/$$f $(install_to)/$(notdir $(xmldir))/$$f; done
endif

uninstall:
	-for f in $(INSTALL_FILES); do rm -rf $(install_to)/`basename $$f`; done
ifneq "$(install_sources)" "no"
	-for f in $(XML); do rm -rf $(install_to)/$(notdir $(xmldir))/$$f; done
	-rmdir $(install_to)/$(notdir $(xmldir))
endif
	-rmdir $(install_to)

force-uninstall:
	-rm -rf $(install_to)

##======================================================================
## Rules: utility programs (inplace="yes" only!)

programs: $(PROG_DEPS)
ifeq "$(inplace)" "yes"
	$(MAKE) -C "$(PROG_DIR)" all
else
	true
endif

##======================================================================
## Rules: perl module (inplace="yes" only!)

##--
ifeq "$(inplace)" "yes"

#tokwrap: programs pm
tokwrap: programs
scripts: $(SCRIPT_DEPS)

pm: $(TOKWRAP_DIR)/Makefile
	$(MAKE) -C $(TOKWRAP_DIR)

$(TOKWRAP_DIR)/Makefile: $(TOKWRAP_DIR)/Makefile.PL
	(cd $(TOKWRAP_DIR); $(PERL) Makefile.PL)

else
##-- ifeq "$(inplace)" "yes": else

tokwrap:
	true

scripts:
	true

pm:
	true

endif
##-- ifeq "$(inplace)" "yes": endif

##======================================================================
## Rules: generic XML stuff

##-- pretty-printing: *.fmt[.xml]
%.fmt: %
#	xmllint --format -o $@ $<
	$(SCRIPT_DIR)dtatw-format.perl -o=$@ $<
%.fmt.xml: %.xml
#	xmllint --format -o $@ $<
	$(SCRIPT_DIR)dtatw-format.perl -o=$@ $<
%.fo: %
	xmlstarlet fo -n $< > $@
%.fo.xml: %.xml
	xmlstarlet fo -n $< > $@
CLEAN_FILES += *.fmt *.fmt.xml *.fo.xml *.fo

##-- namespace removal: *.nons[.xml]
%.nons: % programs
	$(PROG_DIR)dtatw-rm-namespaces $< $@
%.nons.xml: %.xml $(RMNS)
	$(PROG_DIR)dtatw-rm-namespaces $< $@
CLEAN_FILES += *.nons *.nons.xml

##-- character removal *.noc[.xml]
%.noc: % scripts
	$(SCRIPT_DIR)dtatw-rm-c.perl $< > $@ 
%.noc.xml: %.xml scripts
	$(SCRIPT_DIR)dtatw-rm-c.perl $< > $@ 
%.noc.xml: $(xmldir)/%.xml scripts
	$(SCRIPT_DIR)dtatw-rm-c.perl $< > $@ 
CLEAN_FILES += *.noc *.noc.xml


##======================================================================
## Rules: cleanup

no-log: nolog
nolog: ; rm -f *.log
REALCLEAN_FILES += *.log

TMPCLEAN_FILES ?= $(filter-out %.t.xml %.errors,$(CLEAN_FILES))

cleanconfig:
	@echo TMPCLEAN_DEPS=$(TMPCLEAN_DEPS)
	@echo TMPCLEAN_FILES=$(TMPCLEAN_FILES)
	@echo CLEAN_DEPS=$(CLEAN_DEPS)
	@echo CLEAN_FILES=$(CLEAN_FILES)
	@echo REALCLEAN_DEPS=$(REALCLEAN_DEPS)
	@echo REALCLEAN_FILES=$(REALCLEAN_FILES)

tmpclean:
	test -z "$(TMPCLEAN_FILES)" || rm -f $(TMPCLEAN_FILES)

clean: $(CLEAN_DEPS)
	test -z "$(CLEAN_FILES)" || rm -f $(CLEAN_FILES)

realclean: $(REALCLEAN_DEPS)
	test -z "$(REALCLEAN_FILES)" || rm -f $(REALCLEAN_FILES)
