#!/usr/bin/python
# -*- coding: utf-8 -*-

#~ trendtest
#~ Copyright (C) 2013 Interbull Centre
#~
#~ This program is free software: you can redistribute it and/or modify
#~ it under the terms of the GNU General Public License as published by
#~ the Free Software Foundation, either version 3 of the License, or
#~ (at your option) any later version.
#~
#~ This program is distributed in the hope that it will be useful,
#~ but WITHOUT ANY WARRANTY; without even the implied warranty of
#~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#~ GNU General Public License for more details.
#~
#~  http://www.gnu.org/licenses/

# program ttconvert3.py

'''Convert traditional proof files into new file formats for trend validation.

The program searches DATADIR for files named filexxx[suffix], for xxx in
010/015/115/016/017/018/019/020, and creates a single evaluation file called
file300_POPBRD with separate bull proof records for all traits found in all the
xxx files matching the specified breed of evaluation (BRD) and population code
(POP). Similarly, it searches for method 3 validation files named
fileyyy[suffix], for yyy in 040/045/046/047/048/049/050 and creates a single
data file called file303_POPBRD.
'''

# Revision history:
# 2013.10.17- GJansen - original version

import os
import sys
import argparse
import ibutils
import codecs

# to see help summary: python ttconvert3.py --help
epilog = '''See detailed instructions at:
 https://wiki.interbull.org/public/TrendTest_Software?action=print'''

# see http://docs.python.org/2.7/howto/argparse.html
parser = argparse.ArgumentParser(epilog=epilog)
parser.add_argument("brd",
                    help='evaluation breed code (BSW/GUE/JER/HOL/RDC/SIM)')
parser.add_argument('pop',
                    help='population code (same as country code except for'\
                        ' CHR/DEA/DFS/FRR/FRM)')
parser.add_argument('datadir',
                    help='absolute or relative path to data files')
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
                    help='increase output verbosity')
parser.add_argument('-s', '--suffix', dest='suffix', default='',
                    help='suffix to add to all input file names, eg. ".usa" if'
                    ' file names are like fileC010f.usa (default=none)')
parser.add_argument('-e',  '--encoding', default='utf-8',
                    help='input file encoding (default=utf-8; try also '
                    'iso-8859-1 or other values listed at http://docs.python.'
                    'org/2/library/codecs.html#standard-encodings)')
parser.add_argument('-o', '--outdir', dest='outdir',
                    help='directory for output files (default=DATADIR)')
args = parser.parse_args()

brd = ibutils.check_breed(args.brd)
pop = args.pop.upper()
_POPBRD = '_' + pop + brd
datadir = args.datadir
outdir = args.outdir if args.outdir else args.datadir
# country is same as population, with a few exceptions ...
pop2cou = {'CHR':'CHE', 'DEA':'DEU', 'DFS':'DNK', 'FRR':'FRA', 'FRM':'FRA'}
cou = pop2cou.get(pop, pop)

ibutils.dated_msg(sys.argv[0] + ': start')
if args.verbose:
    print(sys.argv[0] + ' version=' + ibutils.version)

print('Processing brd=%s pop=%s cou=%s datadir=%s' % (brd, pop, cou, datadir))

if not os.path.exists(datadir):
    print('absolute DATADIR: ' + os.path.abspath(datadir))
    print('%s: error: DATADIR does not exist or has incorrect permissions'
          % sys.argv[0])
    sys.exit(1)
if not os.path.exists(outdir):
    os.makedirs(outdir)
if args.verbose:
    print('absolute DATADIR: ' + os.path.abspath(datadir))
    print('absolute OUTDIR : ' + os.path.abspath(outdir))

#------------------------------------------------------------------------------
# convert proof files
bdates = {}; counts = {}
file300 = os.path.join(outdir, 'file300' + _POPBRD)
file303 = os.path.join(outdir, 'file303' + _POPBRD)
if args.verbose:
    print('opening output file %s ...' % file300)
    print('opening output file %s ...' % file303)
f300 = open(file300, 'w'); n300 = 0
f303 = open(file303, 'w'); n303 = 0
# define list of pairs of file types to process
x15 = '115' if brd == 'BSW' else '015'
for rec_type in ['010', '040', x15, '045', '016', '046', '017', '047',
                 '018', '048', '019', '049', '020', '050']:
    infile = os.path.join(datadir, 'file' + rec_type + args.suffix)
    if args.verbose:
        print('looking for file ' + infile)
    if not os.path.isfile(infile):
        continue

    # record length for each type of file
    recl = {'010':318, '015':585, '115':609, '016':134, '017':134,
            '018':196, '019':226, '020':136, '040':280, '045':285,
            '046':182, '047':182, '048':292, '049':346, '050':184}[rec_type]
    # 4-letter traitgroup codes
    trtg = {'010':'prod', '015':'conf', '115':'bcon', '016':'uder',
            '017':'long', '018':'calv', '019':'fert', '020':'work',
            '040':'prod', '045':'conf', '046':'uder', '047':'long',
            '048':'calv', '049':'fert', '050':'work'}[rec_type]
    # list of traits in the file
    traits = ibutils.get_trait_list(trtg)
    if rec_type == '045':
        traits = ['sta', 'usu', 'loc', 'bcs']
    # ready ...
    if args.verbose:
        print('reading file %s ...' % infile)
    nin = 0; nok = 0; n1 = 0
    for rec in codecs.open(infile, encoding=args.encoding):
        if len(rec) < 5: continue # skip EOF marker
        nin += 1
        if rec_type != rec[0:3]:
            print('%s: error: found rec_type %s in file %s' %
                  (sys.argv[0], rec[0:3], infile))
        brd1 = rec[3:6]
        if brd1 != brd: continue
        cou1 = rec[recl-3:recl]
        if cou1 != cou: continue
        aid = rec[6:25].replace(' ', '~')
        nok += 1
        if aid not in bdates:
            if rec_type == '010':
                bdates[aid] = rec[101:109].replace(' ', '0')
            else:
                bdates[aid] = rec[55:63].replace(' ', '0')
        for i, trt in enumerate(traits):

            # production --------------------------------------------------
            if rec_type in ['010', '040']:
                typ_prf = rec[245:247]
                offic = rec[248]
                stat = rec[255:257]
                if trt == 'pro':
                    prf = rec[271:277]
                    # skip missing values
                    if prf == '999999' or prf == '      ': continue
                    nd = rec[225:231]
                    nh = rec[231:237]
                    edc = rec[309:315]
                    rel = rec[243:245]
                else:
                    nd = rec[205:211]
                    nh = rec[211:217]
                    rel = rec[223:225]
                    if trt == 'mil':
                        edc = rec[297:303]
                        prf = rec[257:265]
                        if prf == '99999999' or prf == '       ': continue
                    else:
                        edc = rec[303:309]
                        prf = rec[265:271]
                        if prf == '999999' or prf == '      ': continue
                prf = 0.01 * float(prf)
                if rec_type == '040':
                    if trt == 'pro':
                        newdau = rec[147:171]
                    else:
                        newdau = rec[109:133]
                    year1d = rec[133:137]


            # conformation -------------------------------------------------
            elif rec_type in ['015', '115', '045']:
                if trt not in ['sta', 'usu', 'loc', 'bcs']:
                    continue
                if rec_type[1] == '1': # 015 or 115 record
                    p = 78 + i*24   # start position trait block
                    prf = rec[p+19:p+24]
                    if prf == '99999'  or prf == '     ': continue
                    typ_prf = rec[73:75]
                    offic = rec[75]
                    stat = rec[76:78]
                    nd = rec[p:p+6]
                    nh = rec[p+6:p+11]
                    edc = rec[p+11:p+17]
                    rel = rec[p+17:p+19]
                else: # 045 record
                    p = {'sta':233, 'usu':246, 'loc':259, 'bcs':272}[trt] - 1
                    nd = rec[p:p+6]
                    prf = rec[p+6:p+11]
                    if prf == '99999'  or prf == '     ': continue
                    typ_prf = rec[225:227]
                    p = {'sta':64, 'usu':102, 'loc':140, 'bcs':178}[trt] - 1
                    newdau = rec[p:p+24]
                    year1d = rec[p+24:p+28]
                prf = 0.01 * float(prf)

            # uder + long --------------------------------------------------
            elif rec_type in ['016', '017', '046', '047']:
                if rec_type[1] == '1': # 016 or 017 record
                    p = 77 + i*28               # start position of each block
                    prf = rec[p+20:p+26]
                    if prf == '999999' or prf == '      ': continue
                    typ_prf = rec[71:73]
                    stat = rec[73:75]
                    offic = rec[p]
                    nd = rec[p+1:p+7]
                    nh = rec[p+7:p+12]
                    edc = rec[p+12:p+18]
                    rel = rec[p+18:p+20]
                else: # 046 or 047 record
                    p = {'scs':154, 'mas':168, 'dlo':154}[trt] - 1
                    nd = rec[p:p+6]
                    prf = rec[p+6:p+12]
                    if prf == '999999' or prf == '      ': continue
                    typ_prf = rec[147:149]
                    p = {'scs':64, 'mas':102, 'dlo':64}[trt] - 1
                    newdau = rec[p:p+24]
                    year1d = rec[p+24:p+28]
                if rec_type == '017' and cou1 == 'NLD':
                    prf = float(prf)
                elif rec_type == '017' and  cou1 == 'ISR':
                    prf = 0.01 * float(prf)
                else:
                    prf = 0.001 * float(prf)

            # calv + fert + work -------------------------------------------
            elif rec_type in ['018', '019', '020', '048', '049', '050']:
                stat = rec[71:73]
                if rec_type[1] in '12': # 018/019/020
                    p = 75 + i*30               # start position of each block
                    prf = rec[p+22:p+28]
                    if prf == '999999' or prf == '      ': continue
                    typ_prf = rec[p:p+2]
                    offic = rec[p+2]
                    nd = rec[p+3:p+9]
                    nh = rec[p+9:p+14]
                    edc = rec[p+14:p+20]
                    rel = rec[p+20:p+22]
                else: # 048/049/050
                    p = {'048':225, '049':263, '050':149}[rec_type] + i*16
                    prf = rec[p+10:p+16]
                    if prf == '999999' or prf == '      ': continue
                    typ_prf = rec[p:p+2]
                    nd = rec[p+4:p+10]
                    p = 63 + i*38
                    newdau = rec[p:p+24]
                    year1d = rec[p+24:p+28]
                prf = 0.001 * float(prf)

            if typ_prf == '  ' or typ_prf.strip() =='0': typ_prf = '00'
            if rec_type[1] in '12':
                if stat == '  ': stat = '00'
                if offic == ' ': offic = 'N'
            if nd.strip() == '': nd = 0
            if rec_type[1] in '12':
                if nh.strip() == '': nh = 0
                if edc.strip() == '': edc = 0

            # ok, write a record for this trait
            part1 = ' '.join((brd,pop,trt,aid))
            if rec_type[1] in '12': # 01x/020
                f300.write('300 %s %s %s %s%8d%8d%8d%8.4f%10.3f\n' %
                           (part1, typ_prf, offic, stat, int(nd), int(nh),
                            int(edc), float(rel), prf))
            else:
                f303.write('303 %s %s %s%8d%10.3f%6d%6d%6d%6d %s\n' %
                           (part1, bdates[aid][:4], typ_prf, int(nd), prf,
                            int(newdau[:6]), int(newdau[6:12]),
                            int(newdau[12:18]), int(newdau[18:24]), year1d))
            n1 += 1
            counts[trtg, trt] = counts.get((trtg, trt), 0) + 1

    print('%8d records read from         %s' % (nin, infile))
    if args.verbose:
        print('%8d records from pop+brd %s' % (nok, pop+brd))
        print('%8d trait records written to  %s' % \
                  (n1, file300 if rec_type[1] in '12' else file303))
    if rec_type[1] in '12':
        n300 += n1
    else:
        n303 += n1
f300.close(); f303.close()
for nout, filename in [(n300, file300), (n303, file303)]:
    if nout > 0:
        print('%8d total records written to  %s' % (nout, filename))
    else:
        print('\nWARNING: no records written to file %s.\n'
              'Please double check the input file names and suffix and make '
              'sure the \npopulation code on the command line matches the '
              'country code in the files.\n' % filename)
        sys.exit(2)

#------------------------------------------------------------------------------
# write out bdate_POPBRD with aid+bdate
# - note: contains bulls in any file01X/file04X for this _POPBRD
filebd = os.path.join(outdir, 'bdate' + _POPBRD)
if os.path.exists(filebd):
    # read pre-existing bdate file from another convert program, if any
    for rec in open(filebd):
        aid, bdate = rec[:19], rec[20:28]
        bdates[aid] = bdate
if args.verbose:
    print('writing file with birth dates: ')
with open(filebd, 'w') as f:
    for aid in sorted(bdates):
        f.write('%s %s\n' % (aid, bdates[aid].replace(' ', '0')))
print('%8d records written to        %s\n' % (len(bdates), filebd))

#------------------------------------------------------------------------------
ibutils.dated_msg(sys.argv[0]+': done')
