|
| 1 | +ZIPFILE=nb_NO |
| 2 | +LANGUAGE=norsk |
| 3 | + |
| 4 | + |
| 5 | +UNZIP=unzip -o |
| 6 | + |
| 7 | + |
| 8 | +all:$(LANGUAGE).dict$(LANGUAGE).aff |
| 9 | + |
| 10 | +$(ZIPFILE).aff:$(ZIPFILE).zip |
| 11 | +$(UNZIP)$?$@ |
| 12 | +touch$@ |
| 13 | + |
| 14 | + |
| 15 | +# 1 Cleanup dictionary |
| 16 | +# 2 remove " symbol |
| 17 | +# 3 add compoundwords controlled flag to word which hasn't it, but |
| 18 | +# has compound only suffixes |
| 19 | + |
| 20 | +$(LANGUAGE).dict:$(ZIPFILE).zip |
| 21 | +$(UNZIP)$?$(ZIPFILE).dic |
| 22 | +grep -v -E'^[[:digit:]]+$$'<$(ZIPFILE).dic\ |
| 23 | +| grep -v'\.'\ |
| 24 | +| sed -e's/"//g'\ |
| 25 | +| perl -pi -e's|/(\S+)| $$q=$$1; ( $$q=~/[\\_`]/ && $$q!~/z/ ) ? "/$${q}z" : "/$${q}"|e'\ |
| 26 | +| sort\ |
| 27 | +>$@ |
| 28 | + |
| 29 | +#just convert affix file |
| 30 | + |
| 31 | +$(LANGUAGE).aff:$(ZIPFILE).aff |
| 32 | +grep -v -i zyzyzy$(ZIPFILE).aff\ |
| 33 | +| grep -v -i zyzyzy\ |
| 34 | +| perl -pi\ |
| 35 | +-e's/^COMPOUNDFLAG\s+(\S+)/compoundwords controlled $$1/;'\ |
| 36 | + -e's/^COMPOUNDMIN\s+(\d+)/compoundmin $$1/;'\ |
| 37 | + -e's/^PFX\s+(\S+)\s+Y\s+\d+.*$$/ if ( !$$wasprf ) { $$wasprf=1; "prefixes\n\nflag $$1:" } else { "flag $$1:" } /e;'\ |
| 38 | + -e's/^PFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc(" $$3 > $$2")/e;'\ |
| 39 | + -e's/^(.*)SFX\s+(\S+)\s+([YN])\s+\d+.*$$/ $$flg=($$3 eq "Y") ? "*" : ""; $$flg="~$$flg" if length $$1; $$q=$$2; $$q="\\$$q" if $$q!~m#[a-zA-Z]#; if ( !$$wassfx ) { $$wassfx=1; "suffixes\n\nflag $$flg$$q:" } else { "flag $$flg$$q:" } /e;'\ |
| 40 | + -e's/^.*SFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc(" $$3 > ".( ($$1 eq "0") ? "" : "-$$1,").( ($$2 eq "0") ? "" : "$$2") )/e;'\ |
| 41 | +-e's/^(SET|TRY)/#$$1/'\ |
| 42 | +>$@ |
| 43 | + |
| 44 | +clean: |
| 45 | +rm -rf$(ZIPFILE).aff$(ZIPFILE).dic$(LANGUAGE).dict$(LANGUAGE).aff |
| 46 | + |
| 47 | + |