#!/usr/bin/python
# -*- coding: utf-8 -*-

#~ GEBVtest
#~ Copyright (C) 2013 Interbull Centre
#~
#~ This program is free software: you can redistribute it and/or modify
#~ it under the terms of the GNU General Public License as published by
#~ the Free Software Foundation, either version 3 of the License, or
#~ (at your option) any later version.
#~
#~ This program is distributed in the hope that it will be useful,
#~ but WITHOUT ANY WARRANTY; without even the implied warranty of
#~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#~ GNU General Public License for more details.
#~
#~  http://www.gnu.org/licenses/

# program gtconvert.py

'''Convert traditional proof files into new file formats for GEBVtest.

The program searches DATADIR for files matching fileCxxxf, fileCxxxr,
fileDxxxf or fileGxxxr, with xxx in 010/015/115/016/017/018/019/020, and
creates four files (file300Cf, file300Df, file300Cr and file300Gr) with
separate bull proof records for all traits found in all the xxx files matching
the specified breed of evaluation (BRD) and population/country code (POP).

The heritibilities and evdates are also extracted from the traditional
"parameters" file and written to a traits_POPBRD file, along with default values
for some trait specific options for GEBVtest.
'''

# Revision history:
# 2013.01.31 GJansen - original version

import os
import sys
import argparse
import ibutils
import codecs

#=====================
version = '2013.01.31'
#=====================

# to see help summary: python gtconvert.py --help
epilog = '''See detailed instructions at:
 https://wiki.interbull.org/public/gtconvert_py?action=print'''

# see http://docs.python.org/2.7/howto/argparse.html
parser = argparse.ArgumentParser(epilog=epilog)
parser.add_argument("brd",
                    help='evaluation breed code (BSW/GUE/JER/HOL/RDC/SIM)')
parser.add_argument('pop',
                    help='population code (same as country code except for'\
                        ' CHR/DEA/DFS/FRR/FRM)')
parser.add_argument('datadir',
                    help='absolute or relative path to data files')
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
                    help='increase output verbosity')
parser.add_argument('-s', '--suffix', dest='suffix', default='',
                    help='suffix to add to all input file names, eg. ".usa" if'
                    ' file names are like fileC010f.usa (default=none)')
parser.add_argument('-p', '--parfile', dest='parfile',
                    help='path+name of input "parameter" file (default=DATADIR'
                    '/parameterSUFFIX)')
parser.add_argument('-e',  '--encoding', default='utf-8',
                    help='input file encoding (default=utf-8; try also '
                    'iso-8859-1 or other values listed at http://docs.python.'
                    'org/2/library/codecs.html#standard-encodings)')
parser.add_argument('-d',  '--depvar', default='GM', choices=['DD', 'GM'],
                    help='type of daughter performance on Df file (default=GM)')
parser.add_argument('-y',  '--year',
                    help='minimum birth year for test bulls (default is year of'
                    ' EVALDATE on parameter file less 8 years)')
parser.add_argument('-x',  '--type2x', default='N', choices=['Y', 'N'],
                    help='inclusion of type 21+22 bulls in test group'
                    ' (default=N)')
parser.add_argument('-o', '--outdir', dest='outdir',
                    help='directory for output files (default=DATADIR)')
args = parser.parse_args()

brd = ibutils.check_breed(args.brd)
pop = args.pop.upper()
_POPBRD = '_' + pop + brd
datadir = args.datadir
outdir = args.outdir if args.outdir else args.datadir
# country is same as population, with a few exceptions ...
pop2cou = {'CHR':'CHE', 'DEA':'DEU', 'DFS':'DNK', 'FRR':'FRA', 'FRM':'FRA'}
cou = pop2cou.get(pop, pop)

ibutils.dated_msg(sys.argv[0] + ': start')
if args.verbose:
    print(sys.argv[0] + ' version=' + version)

print('Processing brd=%s pop=%s cou=%s datadir=%s' % (brd, pop, cou, datadir))

if not os.path.exists(datadir):
    print('absolute DATADIR: ' + os.path.abspath(datadir))
    print('%s: error: DATADIR does not exist or has incorrect permissions'
          % sys.argv[0])
    sys.exit(1)
if not os.path.exists(outdir):
    os.makedirs(outdir)
if args.verbose:
    print('absolute DATADIR: ' + os.path.abspath(datadir))
    print('absolute OUTDIR : ' + os.path.abspath(outdir))


# translation from 2-letter trait codes to 3-letter codes
old2new = {'mi':'mil', 'fa':'fat', 'pr':'pro', 'sc':'scs', 'ma':'mas',
           'dl':'dlo', 'dc':'dce', 'mc':'mce', 'ds':'dsb', 'ms':'msb',
           'hc':'hco', 'cy':'crc', 'c1':'cc1', 'c2':'cc2', 'it':'int',
           'md':'msp', 'te':'tem'}

#------------------------------------------------------------------------------
# convert parameter file
parfile = args.parfile if args.parfile else \
    os.path.join(datadir, 'parameter' + args.suffix)
print('parfile=%s' % parfile)
traitfile = os.path.join(outdir, 'traits' + _POPBRD)
f = open(traitfile, 'w'); n = 0
for rec in open(parfile):
    if len(rec) < 5: continue
    # brd1, pop1, tr, evdate, her, meann, sign, stdn, meano, stdo, pub =
    z = rec.strip().split()
    if len(z) < 5:
        print('*** skipping bad row in "parameter" file')
        print(rec)
        continue
    brd1, pop1, tr, evdate, her = z[:5]
    if brd1 != brd: continue
    if pop1 == 'dnk': pop1 = 'dfs'
    if pop1.upper() != pop: continue
    trt = old2new.get(tr, tr)
    minyear = args.year if args.year else str(int(evdate[:4]) - 8)
    f.write('%s %s %s %s %s %s\n' % (trt, her, evdate, args.depvar, minyear,
                                     args.type2x))
    n += 1
if n == 0:
    print('%s: error: no valid records for brd=%s and pop=%s in file %s'
          % (sys.argv[0], brd, pop, parfile))
    sys.exit()
print('%8d records written to        %s' % (n, traitfile))

#------------------------------------------------------------------------------
# convert proof files
bdates = {}
for (A, b) in [('C','f'), ('C','r'), ('D','f'), ('G','r')]:
    outfile = os.path.join(outdir, 'file300' + A + b + _POPBRD)
    if args.verbose:
        print('opening output file %s ...' % outfile)
    f = open(outfile, 'w'); nout = 0
    for rec_type in ['010', '015', '115', '016', '017', '018', '019', '020']:
        infile = os.path.join(datadir, 'file' + A + rec_type + b + args.suffix)
        if not os.path.isfile(infile):
            continue
        # record length for each type of file
        recl = {'010':318, '015':585, '115':609, '016':134,
                '017':134, '018':196, '019':226, '020':136}[rec_type]
        # list of traits in the file
        traits = ibutils.get_trait_list(rec_type)
        #if rec_type == '016':
        #    # NB! update itbinfo.py when a new pop has a mastitis evaluation
        #    pop_has_mas = pop in ibutils.has_mastitis_eval[brd]
        # ready ...
        if args.verbose:
            print('reading file %s ...' % infile)
        nin = 0; nok = 0; n1 = 0
        for rec in codecs.open(infile, encoding=args.encoding):
            if len(rec) < 5: continue # skip EOF marker
            nin += 1
            if rec_type != rec[0:3]:
                print('%s: error: found rec_type %s in file %s' %
                      (sys.argv[0], rec[0:3], infile))
            brd1 = rec[3:6]
            if brd1 != brd: continue
            cou1 = rec[recl-3:recl]
            if cou1 != cou: continue
            aid = rec[6:25].replace(' ', '~')
            nok += 1
            if aid not in bdates:
                if rec_type == '010':
                    bdates[aid] = rec[101:109]
                else:
                    bdates[aid] = rec[55:63]
            for i, trt in enumerate(traits):
                # skip traits in a few special cases
                if pop == 'DEA' and brd == 'BSW' and trt in ['dsb', 'msb']:
                    continue
                if pop == 'ISR' and brd == 'HOL' and trt == 'int':
                    continue

                # production --------------------------------------------------
                if rec_type == '010':
                    typ_prf = rec[245:247]
                    offic = rec[248]
                    stat = rec[255:257]
                    if trt == 'pro':
                        nd = rec[225:231]
                        nh = rec[231:237]
                        edc = rec[309:315]
                        rel = rec[243:245]
                        prf = rec[271:277]
                        if prf == '999999': prf = '99999999'
                    else:
                        nd = rec[205:211]
                        nh = rec[211:217]
                        rel = rec[223:225]
                        if trt == 'mil':
                            edc = rec[297:303]
                            prf = rec[257:265]
                        else:
                            edc = rec[303:309]
                            prf = rec[265:271]
                            if prf == '999999': prf = '99999999'
                    prf1 = '%10.0f' % (0.01 * float(prf))

                # conformation -------------------------------------------------
                if rec_type == '015' or rec_type == '115':
                    typ_prf = rec[73:75]
                    offic = rec[75]
                    stat = rec[76:78]
                    p = 78 + i*24               # start position trait block
                    nd = rec[p:p+6]
                    nh = rec[p+6:p+11]
                    edc = rec[p+11:p+17]
                    rel = rec[p+17:p+19]
                    prf = rec[p+19:p+24]
                    if prf == '99999' or prf.strip() == '':
                        prf = '99999999'
                    else:
                        prf1 = '%10.2f' % (0.01 * float(prf))

                # uder + long --------------------------------------------------
                if rec_type == '016' or rec_type == '017':
                    typ_prf = rec[71:73]
                    stat = rec[73:75]
                    p = 77 + i*28               # start position of each block
                    # if trt == 'mas' and not pop_has_mas:
                    #    # special code to use scs block for mas
                    #    p = 77
                    offic = rec[p]
                    nd = rec[p+1:p+7]
                    nh = rec[p+7:p+12]
                    edc = rec[p+12:p+18]
                    rel = rec[p+18:p+20]
                    prf = rec[p+20:p+26]
                    if prf == '999999':
                        prf = '99999999'
                    elif rec_type == '017' and cou1 == 'NLD':
                        prf1 = '%10.3f' % (1 * float(prf))
                    elif rec_type == '017' and  cou1 == 'ISR':
                        prf1 = '%10.3f' % (0.01 * float(prf))
                    else:
                        prf1 = '%10.3f' % (0.001 * float(prf))

                # calv + fert + work -------------------------------------------
                if rec_type >= '018' and rec_type <= '020':
                    stat = rec[71:73]
                    p = 75 + i*30               # start position of each block
                    typ_prf = rec[p:p+2]
                    offic = rec[p+2]
                    nd = rec[p+3:p+9]
                    nh = rec[p+9:p+14]
                    edc = rec[p+14:p+20]
                    rel = rec[p+20:p+22]
                    prf = rec[p+22:p+28]
                    if prf == '999999': prf = '99999999'
                    prf1 = '%10.3f' % (0.001 * float(prf))

                if prf == '99999999': continue  # skip missing values
                if typ_prf == '  ' or typ_prf.strip() =='0': typ_prf = '00'
                if stat == '  ': stat = '00'
                if offic == ' ': offic = 'N'
                if nd.strip() == '': nd = 0
                if nh.strip() == '': nh = 0
                if edc.strip() == '': edc = 0

                # ok, write a record for this trait
                f.write(' '.join(('300',brd,pop,trt,aid,typ_prf,offic,stat))
                        + '%8d%8d%8d%8.4f%s\n' %
                        (int(nd), int(nh), int(edc), float(rel), prf1))
                n1 += 1

        print('%8d records read from         %s' % (nin, infile))
        if args.verbose:
            print('%8d records from pop+brd %s' % (nok, pop+brd))
            print('%8d trait records written' % (n1))
        nout += n1
    print('%8d total records written to  %s\n' % (nout, outfile))
    f.close()

#------------------------------------------------------------------------------
# write out file736 with aid+bdate
# - note: file736 contains bulls in any Cf/Df/Cr/Gr file for this _POPBRD
file736 = os.path.join(outdir, 'file736' + _POPBRD)
if args.verbose:
    print('writing file with birth dates: %s' % file736)
with open(file736, 'w') as f:
    for aid in sorted(bdates):
        f.write('736 %s %s\n' % (aid, bdates[aid]))
print('%8d records written to        %s\n' % (len(bdates), file736))

#------------------------------------------------------------------------------
ibutils.dated_msg(sys.argv[0]+': done')
