#!/usr/bin/python3
# -*- coding: utf-8 -*-

#~ GEBVtest
#~ Copyright (c) 2013-2025 Interbull Centre
#~
#~ This program is free software: you can redistribute it and/or modify
#~ it under the terms of the GNU General Public License as published by
#~ the Free Software Foundation, either version 3 of the License, or
#~ (at your option) any later version.
#~
#~ This program is distributed in the hope that it will be useful,
#~ but WITHOUT ANY WARRANTY; without even the implied warranty of
#~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#~ GNU General Public License for more details.
#~
#~  http://www.gnu.org/licenses/

'''
Perform the GEBV validation tests for one breed-population combination,
for all traits.
'''

# Revision history:
# 2013.01.05 GJansen - original version
# 2014.06.02 MAN - The same weight is used for DD and GM (edcd)
# 2015.02.27 MAN - The weight edcd/(edcd+Lambda) was used instead of edcd.
# 2019.05.23 HBA - (b<1.2) rule implemented
# 2019.09.11 HBA - small bug affecting inclusing of foreign bulls fixed
# 2021.11.01 PGS - Options for different validation targets, and base adjustment variables
# 2021.11.01 PGS - Scale the reduced-data evaluations to match current-data base of expression
# 2022.10.14 PGS - Estimate and use trait-specific weightings to optimize reduced-data scaling
# 2022.11.01 PGS - Derive deregressed GEBV as a validation target from the data in file300Gf versus [Gr or Cr]
# 2022.12.20 PGS   ...  Base-adjust Gr and Cr before deriving the deregressed GEBV
# 2022.12.20 PGS - Option to run the standard Legarra-Reverter tests, using unweighted regressions
# 2022.12.20 PGS - Improve command-line checks for recognized option requests only
# 2022.12.20 PGS - Only require file300's that are needed for the requested options 
# 2023.05.05 PGS - Weight by RELdiff in validation regression tests with genomic targets   (VanRaden, 2021, ITB Bulletin)
# 2023.06.15 PGS - New category "hiSE" if a practical FAIL is inconclusive due to a statistical PASS: |t|<2
# 2023.11.17 PGS - Add bootstrap tests of significance for both the validation slope and R*R improvement tests
# 2023.11.27 PGS - Add bias tests across the range of reduced-data evaluations (i.e. tests of bias for average vs. top bulls)
# 2024.04.01 PGS - Use insert-order dictionary entries (OrderedDict) for backwards compatability before Python 3.6
# 2024.04.01 PGS - Setting default --baseadj=EBV
# 2024.04.01 PGS - Add a bootstrap test for the PA validation slope (b1-Cr), as extra information 
# 2024.05.01 PGS - Add a minimum number of bulls for --baseadj, which is double the number of validation bulls
#                  --> include more years of historically proven bulls in Cr/Gr or request exception --baseadj=NONE
# 2024.05.01 PGS - Exclude foreign bulls from validation group if they have foreign daughters (top=24) in Gr eval'n
# 2024.05.01 PGS - Add V(GEBVf-GEBVr) for validation bulls. E(GRELf-GRELr) = this V() / V(g)  (GREL working group).
# 2024.09.12 CW  - Avoid checking of aid in Df and Cr files if Df is not required (target !=DEBV)
# 2025.05.14 PGS - Stop requiring EDC from a Df file that can be missing... use the Cf file
# 2025.05.14 PGS - Add an interim WARNING range for slopes of non-production traits, until suitable FAIL ranges can be defined
# 2025.05.14 PGS - Update the file inclusion list for zipfiles sent to ITBC

import os
import sys
import argparse
import locale
import zipfile
from datetime import date
import numpy as np
import math as m
import ibutils

from os.path import basename
from collections import OrderedDict

global debug

#=====================
version = '2025.05.15'
#=====================

rundate = date.today().strftime('%Y%m%d')
#rundate = '20250513'  ############ temp

# to see help summary: python gebvtest.py --help
epilog = ('See detailed instructions at: '
          'https://interbull.org/ib/gebvtest_software')

# see https://docs.python.org/3.12/howto/argparse.html
parser = argparse.ArgumentParser(epilog=epilog)
parser.add_argument('brd',
                    help='evaluation breed code (BSW/GUE/JER/HOL/RDC/SIM)')
parser.add_argument('pop',
                    help='population code (same as country code except for'
                    ' CHR/DEA/DFS/FRR/FRM)')
parser.add_argument('datadir',
                    help='absolute or relative path to data files')
parser.add_argument('--version', action='version', version=("%(prog)s "+version),
                    help="show version of this program and exit")
parser.add_argument('-v', '--verbose', action='store_true',
                    help='increase output verbosity')
parser.add_argument('-Z', '--no-zip', action='store_true',
                    help='do not create a zip file (eg. for preliminary testing'
                    ' or usage at ITBC)')
parser.add_argument('-C', '--cleanup', action='store_true',
                    help='delete all files successfully added to the zip file')
parser.add_argument('--target', choices = [ 'DGEBV', 'VFEBV', 'GEBV', 'EBV', 'DEBV' ],
                    help='validation target options are: '
                    '[ DGEBV, VFEBV, GEBV, EBV, DEBV ]  (default=DGEBV)')
parser.add_argument('--weight', choices = [ 'ITB', 'LR' ],
                    help='Options are: [ ITB or LR ], for '
                    'the Interbull weighted-regression test or '
                    'Legarra-Reverter un-weighted regression, '
                    'respectively  (default=ITB)')
parser.add_argument('--min_byear',
                    help='specify a minimum birth year to use instead of '
                    'using the value specified in the traits file')
parser.add_argument('--fb', choices = [ 'Y', 'N' ],
                    help='specify Y or N to include foreign bulls in the '
                    'validation group, instead of Y/N from the traits file')
parser.add_argument('--baseadj', choices = [ 'GEBV', 'EBV', 'NONE' ],
                    help='evaluation variable to use for base adjustments, '
                    'options are: [ EBV, GEBV, NONE ]  (default=EBV)')
parser.add_argument('--power',
                    help='specify a base for the power function weighting records in '
                    'base adjustments, instead of optimizing the base from the data')
parser.add_argument('--baseincl',
                    help='comma-separated lists of restrictions on bulls to include '
                    'for base adjustment estimates, [ min,max byr : '
                    'proof type list : proof status list : official Y/N ]')
parser.add_argument('--traitsincl',
                    help='comma-separated list of traits to process')
parser.add_argument('--outdir',
                    help='absolute or relative path to write output files'
                    ' (default=.)')
parser.add_argument('-m', '--mergefiles', action='store_true',
                    help='write merged data files (for independent data checks)')
parser.add_argument('-M', '--mergedir',
                    help='absolute or relative path for merged data files'
                    ' (default=OUTDIR/merged)')
parser.add_argument('-s', '--samples', default='1000',
                    help='number of bootstrap samples for R-squared test '
                    '(default=1000)')
parser.add_argument('--accept_bias', default=0.25,
                    help='standardized ignorable bias accepted in practice '
                    '(default=0.25)')

args = parser.parse_args()

brd = ibutils.check_breed(args.brd)
pop = args.pop.upper()
_POPBRD = '_' + pop + brd
# country is same as population, with a few exceptions ...
pop2cou = {'CHR':'CHE', 'DEA':'DEU', 'DFS':'DNK', 'FRR':'FRA', 'FRM':'FRA'}
cou = pop2cou.get(pop, pop)

if not os.path.exists(args.datadir):
    print('absolute DATADIR: ' + os.path.abspath(args.datadir))
    print('%s: error: DATADIR does not exist or has incorrect permissions'
          % sys.argv[0])
    sys.exit(1)

#------------------------------------------------------------------------------
def read_file300(brd, pop, trt, dset, args):
    '''read and store data for one dataset (Cf/Cr/Gr/Df/Gf/Vf) and one trait'''
    rel_max = 0.0

    records = OrderedDict()
    record2 = OrderedDict()
    file300 = 'file300' + dset + '_' + pop + brd
    try:
        infile = open(file300)
    except:  # FileNotFoundError ... do:
        # check if this file is required
        required = 0
        if dset == 'Cr':
            required = 1
        elif dset ==  'Gr':
            required = 1
        elif dset ==  'Df':
            if target == 'DEBV': required = 1
        elif dset == 'Cf':
            if basevar == 'EBV' or target == 'EBV': required = 1
        elif dset ==  'Gf':
            if basevar == 'GEBV' or target == 'GEBV' or target == 'DGEBV' : required = 1
        if required == 1:
            print( 'Required input file is missing: %s' % file300 )
            sys.exit(5)
        return records
    if args.verbose:
        log.write('reading %s ...\n' % file300)
    for n, rec in enumerate(infile):
        if rec[12:15] != trt:
            continue
        # top = type of proof (11, 12, 21 etc)
        # off = officially publishable proof (Y/N)
        # sta = bull status
        # nd = n. daughters
        # nh = n. herds
        words = rec.replace(',',' ').split()
        if len(words) > 13:
            xxx, brd1, pop1, trt1, aid, top, off, sta, nd, nh, edc, rel, ebv, extra_d = rec.replace(',',' ').split(None,13)
            extra_data = ','.join( extra_d.split() )
        else:
            extra_data = None
            try:
                xxx, brd1, pop1, trt1, aid, top, off, sta, nd, nh, edc, rel, ebv = rec.replace(',',' ').split(None,13)
            except:
                msg = ('%s: error: cannot parse record with %d words from file %s\nrecord: %s\n'
                       % (sys.argv[0], len(words), file300, rec))
                log.write(msg)
                sys.stderr.write(msg)
                sys.exit(-1)
        if n == 0 and brd1 != brd:
            msg = ('%s: error: bad breed code in file %s\n%s instead of %s\n'
                   % (sys.argv[0], file300, brd1, brd))
            log.write(msg)
            sys.stderr.write(msg)
            sys.exit(99)
        if n == 0 and pop1 != pop:
            msg = ('%s: error: bad pop code in file %s\n%s instead of %s\n'
                   % (sys.argv[0], file300, pop1, pop))
            log.write(msg)
            sys.stderr.write(msg)
            sys.exit(99)
        x = float(rel)
        if x > rel_max: rel_max = x
        records[aid] = (top, off, sta, nd, nh, int(edc), float(rel), float(ebv), extra_data)
        record2[aid] = (top, off, sta, nd, nh, int(edc), float(rel) * 100.0, float(ebv), extra_data)
    if args.verbose:
        log.write('stored %6d records for trait %s from file %s\n'
                  % (len(records), trt, file300))
    if rel_max > 1.00:
        return records
    else:
        return record2

#------------------------------------------------------------------------------
def add_extra( data ):
    global empty
    n = 0
    for aid in data:
        top, off, sta, nd, nh, edc, rel, ebv, xxx = data.get(aid, empty)
        n = 0 if xxx == None else xxx.count(',')
        break
    if n == 0:
        miss_rec = empty
    else:
        a = list( empty )
        a[8] = ',' * n
        miss_rec = tuple( a )
    return miss_rec

#------------------------------------------------------------------------------
def derived_from( Full, Reduced, emptyF, emptyR ):
    global empty
    records={}
    if len(Full) > 0 and len(Reduced) > 0:
        for aid in Full:
            if aid in Reduced:
                topgf, offgf, stagf, ndgf, nhgf, edcgf, relgf, gebvf, xxf = Full.get(aid, emptyF)
                topg,  offg,  stag,  ndg,  nhg,  edcg,  relg,  gebv, xxr = Reduced.get(aid, emptyR)
                gebvr = base_b[0] + base_b[1] * gebv
                #if Reduced == dataCr and relg == 0: relg = 0.3 * relgf  ############ temp
                if relgf > 99.5: relgf = 99.5
                if relg  > 99.5: relg  = 99.5
                if relg > 0 and relgf-relg >= 1:
                    ne_r = relg  / (100 - relg )
                    ne_f = relgf / (100 - relgf)
                    ne_2 = ne_f - ne_r
                    den_ne2 = ne_2 / (ne_2 + 1.0) 
                    x = round( gebvr + (gebvf - gebvr) / den_ne2, 2 )
                    edc_dif = int( round( ne_2 * Lambda, 0 ) )
                    records[aid] = (topgf, offgf, stagf, ndgf, nhgf, edc_dif, relgf, x, xxf )
    if args.verbose:
        log.write('derived %6d records for trait %s for a %s file300 equivalent\n'
                  % (len(records), trt, target ))
    return records

#------------------------------------------------------------------------------
def simple_wls(X, y, weights):
    '''Simple weighted least squares'''
    w = np.sqrt(weights)  # these are positive non-zero values
    if X.ndim == 1:
        Xw = np.column_stack((w, X*w))
    else:
        Xw = X*w
    yw = y*w
    XWX = np.dot(Xw.transpose(), Xw)
    XWXinv = np.linalg.inv(XWX)
    XWy = np.dot(Xw.transpose(), yw)
    b = np.dot(XWXinv, XWy)
    sst = np.sum(weights * (y - np.dot(weights, y) / sum(weights))**2)
    wresid = yw - np.dot(Xw, b)
    sse = np.dot(wresid, wresid)
    mean_X = np.dot((X*w).transpose(), w) / np.dot( w.transpose(), w )
    mean_Y = np.dot(yw.transpose(), w) / np.dot( w.transpose(), w )
    pred_Y = b[0] + b[1] * mean_X
    b.resize( 4 )
    b[2] = mean_Y
    b[3] = mean_X
    if sse > 0:
        bse = np.sqrt(np.diag(XWXinv) * sse / (len(y) - Xw.shape[1]))
        rsquared = 1. - sse / sst
    else:
        bse = np.zeros( 2 )
        rsquared = 0.0

    return b, bse, rsquared

#------------------------------------------------------------------------------
def use_for_base_adjust( animal ):
    if basevar == 'NONE': return False
    use = False
    if relr > 50.0 and rel_0 > 50.0 and rel_1 <= 100 and relchg >= 0: use = True
    if use and args.baseincl:
        # check restrictions on bulls to include
        if require_byr[0] != '' and byr < require_byr[0]:
            use = False
        elif require_byr[1] != '' and byr > require_byr[1]:
            use = False
        elif require_type[0] != '' and require_type.count( top ) < 1:
            use = False
        elif require_status[0] != '' and require_status.count( sta ) < 1:
            use = False
        elif require_official != '' and require_official != off:
            use = False
    return use

#------------------------------------------------------------------------------
def OBSprop_with( pbase ):
    global aid, byr, top, sta, off, relr, rel_0, rel_1, relchg, rel_n, wt_u, rel_u
    wt_u = 0.0
    rel_u = 0.0
    rel_n = 0
    for aid in dataCf:
        byr = byear.get(aid, '0000')
        top, off, sta, nd, nh, edc, rel, ebv, xxx = dataCf[aid]
        topr, offr, star, ndr, nhr, edcr, relr, ebvr, xxx = dataCr.get(aid, empty )
        if basevar == 'GEBV':
            topgf, offgf, stagf, ndgf, nhgf, edcgf, relgf, gebvf, xxx = dataGf.get(aid, empty)
            topg, offg, stag, ndg, nhg, edcg, relg, gebv, xxx = dataGr.get(aid, empty)
            rel_1 = relgf
            rel_0 = relg
        else:
            rel_1 = rel
            rel_0 = relr
        relchg = rel_1 - rel_0
        if use_for_base_adjust( aid ):
            rel_n += 1
            rel_u += (rel_0 - rel_u) / rel_n
            wt = rel_0 * pow( pbase, relchg )
            wt_u += (wt - wt_u) / rel_n
    if rel_u:
        OBSpc = wt_u / rel_u
    else:
        OBSpc = 0.0
        if rel_n == 0: rel_n = 1
    return OBSpc


#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------

outdir = os.path.join( args.datadir, args.outdir ) if args.outdir else args.datadir
outdir = os.path.abspath( outdir )
os.system('mkdir -p ' + outdir)
if args.mergefiles:
    mergedir = os.path.join(outdir, 'merged')
    mergedir = os.path.abspath(args.mergedir if args.mergedir else mergedir)
    os.system('mkdir -p ' + mergedir)
    mergedir2 = mergedir + '/BaseCorrected'
    os.system('mkdir -p ' + mergedir2)

power = (locale.atof(args.power) if args.power else 0.25)
target = (args.target if args.target else 'DGEBV')
if target == 'GEBV' or target == 'DGEBV':
    genomic_target = 'Y'
else:
    genomic_target = 'N'
basevar = (args.baseadj if args.baseadj else 'EBV')
val_weight = (args.weight if args.weight else 'ITB')
vwt = '_LR' if val_weight == 'LR' else ''
if args.baseincl:
    restrictions = args.baseincl.split(':')
    nrest = len( restrictions )
    if nrest > 3:
        require_official = restrictions[3]
    else: require_official = ''
    if nrest > 2:
        require_status = restrictions[2].split(',')
    else: require_status = (',').split(',')
    if nrest > 1:
        require_type = restrictions[1].split(',')
    else: require_type = (',').split(',')
    if nrest > 0:
        require_byr = (restrictions[0] + ',').split(',')
    else: require_byr = ( ',').split(',')

#============================================================
os.chdir(args.datadir)                  # NB! move to DATADIR
#============================================================

file736 = 'file736' + _POPBRD
filetrt = 'traits' + _POPBRD
trg=target[0:1]
if target == 'DGEBV': trg = 'DG'
trgA = trg
trgB = ''
if args.min_byear: trgB = trgB + args.min_byear
if args.fb: trgB = trgB + '_fb' + args.fb
trg = trgA + trgB + vwt
if args.baseadj or args.target or args.weight:
    fileout  = os.path.join( outdir, 'b' + basevar[0:1] + 't' + trg + 'file735' + _POPBRD )
    fileadj = os.path.join( outdir, 'b' + basevar[0:1] + 't' + trg + 'file735adj' + _POPBRD )
    filecsv  = os.path.join( outdir, 'b' + basevar[0:1] + 't' + trg + 'file735' + _POPBRD + '.csv' )
    filelog = os.path.join( outdir, '%s_log_b%s_t%s%s' % (os.path.basename( sys.argv[0][:-3] ), basevar[0:1], trg, _POPBRD ))
else:
    fileout  = os.path.join( outdir, 'file735' + _POPBRD )
    fileadj = os.path.join( outdir, 'file735adj' + _POPBRD )
    filecsv  = os.path.join( outdir, 'file735' + _POPBRD + '.csv' )
    filelog = os.path.join( outdir, '%s_log' % (os.path.basename( sys.argv[0][:-3] ) + _POPBRD) )
log = open(filelog, 'w')

ibutils.dated_msg(sys.argv[0]+': start', log=log)

log.write( '\nVersion: ' + version)
log.write( '\nOptions: ' )
for arg in sys.argv[1:]:
    log.write( ' %s' % arg )
log.write( '\n\n' )

if args.verbose:
    log.write('absolute DATADIR: %s\n' % os.path.abspath(args.datadir))


log.write('Processing brd=%s pop=%s cou=%s datadir=%s\n'
      % (brd, pop, cou, args.datadir))

#------------------------------------------------------------------------------
# read and store birth years from file736
byear = {}
for rec in open(file736):
    aid = rec[4:23]
    byear[aid] = rec[24:28]
if args.verbose:
    log.write('stored %6d records from file %s\n' % (len(byear), file736))

#------------------------------------------------------------------------------
# process files trait by trait, in the order listed in the trait info file
#debug = False
if args.verbose:
    log.write('opening output file %s ...\n' % fileout)
f735 = open(fileout, 'w')
fcsv = open(filecsv, 'w')
f735.write('735 brd pop trt evaldate m ntest    mean_y     std_y dv    '
           'mean_x     std_x        b0     se_b0      b1   se_b1 '
           'ncand   i_est  Exp_b1   R2 fb year  tests  pass\n')
fcsv.write('brd,pop,trt,evaldate,rundate,badj,Yval,min_byr,fb,nbadj,ncand,ntest')
fcsv.write(',b_base,b1-Cr,b1-Gr,E(b1-Cr),E(b1-Gr),se(b1-Cr),se(b1-Gr),t-Cr,t-Gr')
fcsv.write(',P(b=Eb)Cr,P(b=Eb)Gr,b-stat,b-pract,b-bio,b1-test')
fcsv.write(',R-sqG,R-sq+,P(R-sq+),R2-test,tests,pass')
fcsv.write(',GEBV-bias,EBV-bias,P(unb_Gave),P(unb_G2sd),P(unb_Eave),P(unb_E2sd)')
if genomic_target == 'Y':
    fcsv.write(',GEBVf_sd')
else:
    fcsv.write(',EBVf_sd')
fcsv.write(',V(Gf-Gr)')
fcsv.write(',s(Y-Ga),s(Y-G2),s(Y-Ea),s(Y-E2)')
fcsv.write(',Y-Gave,Y-G1sd,Y-G2sd,mean-Y,mean-Gr,mean-Cr,sd-Y,sd-Gr,sd-Cr')
fcsv.write(',rel-Y,rel-Gr,rel-Cr,R2/rel-Gr,R2/rel-Cr,b0-Gr,se-b0-Gr,b0-Cr,se-b0-Cr,se-b1-Cr')
#fcsv.write(',P(H_Gave),P(L_Gave),P(H_G2sd),P(L_G2sd)')
fcsv.write('\n')
if args.verbose:
    log.write('opening output file %s ...\n' % fileadj)
base_adjusted = False
empty = ('??', '?', '??', 0, 0, 0., 0., 0., None)
min_Ne = 20.0  # minimum Ne when optimizing the power function for record weights in Base Adjustment
min_adj = 10   # minimum bulls to estimate a Base Adjustment equation

# number of bootstrap samples
min_samples = 1
n_samples = int( args.samples )
if n_samples < min_samples: n_samples = min_samples
tolerated_bias_stdized = float( args.accept_bias )

hiSE_footnote = 'N'
xxxx_footnote = 'N'
for rec in open(filetrt):
    log.flush()
    trt, h2, evdate, depvar, min_byear, type2x = rec.replace(',',' ').split()
    if args.traitsincl and args.traitsincl.count( trt ) < 1: continue
    if args.min_byear: min_byear = args.min_byear
    trgB = min_byear
    if args.fb:
        type2x = args.fb
    if val_weight == 'LR': trgB = trgB + '_LR'

    #--------------------------------------------------------------------------
    # read and store all data for this trait
    dataCr = read_file300(brd, pop, trt, 'Cr', args)
    if not dataCr: # skip traits present in file736 but not in file300Cr
        continue
    dataGr = read_file300(brd, pop, trt, 'Gr', args)
    dataCf = read_file300(brd, pop, trt, 'Cf', args)
    dataDf = read_file300(brd, pop, trt, 'Df', args)
    dataGf = read_file300(brd, pop, trt, 'Gf', args)
    dataVf = read_file300(brd, pop, trt, 'Vf', args)

    if args.verbose:
        log.write('\n' + '-'*79 + '\n')
        log.write('processing trt=%s h2=%s evdate=%s depvar=%s byear=%s '
                  'type2x=%s\n' % (trt, h2, evdate, depvar, min_byear, type2x))
        log.write('-'*79 + '\n')
    Lambda = 4./float(h2) - 1.

    if args.power:
        OBSprop = OBSprop_with( power )
    elif basevar != "NONE":
        #==========================================================================
        # Optimize Base of the Power Function used to weight records for base scaling

        # Use N-R iteration to get as close as possible to the desired 4% of data being used for base correction
        # Constrain each step to an updated allowable range after each previous step

        power_min = .25
        power_max = .75

        vx=[0.0]*3; vy=[0.0]*2
        vx[1] = power_min
        vx[0] = power_max
        for i in range(2): vy[i] = OBSprop_with( vx[i] )
        # TARGET Ne/N = .04 unless that is fewer than min_Ne bulls ... max() below
        #   Do not go above .75 with the TARGET ... min() below
        TARGETprop = min( max( min_Ne / rel_n, .04 ), 0.75 )
        nr_converged = False
        for i in range(10):   # maximum number of NR-steps to take
            nr_moveY = TARGETprop - vy[1]
            dy = vy[1] - vy[0]
            dx = vx[1] - vx[0]
            #// N-R step
            if dy == 0: break   # ave(min,max) will be used
            nr_stepX = nr_moveY * dx / dy;
            vx[2] = vx[1] + nr_stepX
            if vx[2] < power_min:
                vx[2] = power_min
            elif vx[2] > power_max: vx[2] = power_max
            nr_stepX = vx[2] - vx[1];
            if abs( nr_stepX ) < .001:
                nr_converged = True
                break
            #  update the allowable range for next NR-step: guarantees no divergence
            #  if oscillation between power_min and power_max occurs, then the final solution is mid-point between them
            #  almost, if not always, the N-R steps will converge... and very quickly
            if nr_stepX < 0:
                power_max = vx[1];
            else: power_min = vx[1];
            vx[0] = vx[1]
            vx[1] = vx[2]
            vy[0] = vy[1]
            vy[1] = OBSprop_with( vx[1] )
            if args.verbose:
                log.write( '%s  NR %2d :  X  %8.3f%8.3f  Y  %8.3f%8.3f   Target  %8.3f  New Window  [ %.3f, %.3f ] \n' %
                           (trt, i+1, vx[0], vx[1], vy[0], vy[1], TARGETprop, power_min, power_max) )
        if nr_converged:
            power = vx[1]
            OBSprop = vy[1]
        else:
            power = 0.5 * (power_min + power_max)
            OBSprop = OBSprop_with( power )


    #==========================================================================
    # CREATE DATA FILES WITH MERGED RESULTS FROM THE FILE300's
    if args.mergefiles:
        fn_merge = os.path.join(mergedir, trt + '_b' + basevar[0:1] + 't' + trg + '_test.csv')
        fn_merge2 = os.path.join(mergedir2, trt + '_b' + basevar[0:1] + 't' + trg + '_test.csv')
        fn_merge_base = os.path.join(mergedir, trt + '_b' + basevar[0:1] + 't' + trg + 'base.csv')
        fn_merge_base2 = os.path.join(mergedir2, trt + '_b' + basevar[0:1] + 't' + trg + 'base.csv')
        fmerge = open( fn_merge, 'w' )
        if basevar != "NONE": fmerge_base = open( fn_merge_base, 'w' )

    nc=0; nt=0; sumc=0.; sumt=0.; ssc=0.; sst=0.
    x1=[]; x2=[]; x3=[]; x4=[]; y=[]; w=[]; ebvc=[]; ebvt=[]; ghatf=[]
    basef=[]; baser=[]; basew=[]
    r0=[]; rchg=[]; rch0=[]; rch1=[]; rch2=[]; rch3=[]; rch4=[]; rch5=[]
    y_rel=[]; x1_rel=[]; x2_rel=[]

    # ESTIMATE A BASE ADJUSTMENT EQUATION, IF NECESSARY
    nadj = 0
    wt_u = 0.0
    rel_u = 0.0
    base_b = (0.0, 1.0); base_R2 = 1.0   # default = no base adjustment
    if basevar != "NONE":
        for aid in dataCf:
            byr = byear.get(aid, '0000')
            top, off, sta, nd, nh, edc, rel, ebv, extra_Cf = dataCf[aid]
            topd, offd, stad, ndd, nhd, edcd, reld, dpgm, extra_Df = dataDf.get(aid, empty)
            topgf, offgf, stagf, ndgf, nhgf, edcgf, relgf, gebvf, extra_Gf = dataGf.get(aid, empty)
            topvf, offvf, stavf, ndvf, nhvf, edcvf, relvf, dgebvf, extra_DGf = dataVf.get(aid, empty)
            topr, offr, star, ndr, nhr, edcr, relr, ebvr, extra_Cr = dataCr.get(aid, empty)
            topg, offg, stag, ndg, nhg, edcg, relg, gebv, extra_Gr = dataGr.get(aid, empty)

            if basevar == 'GEBV':
                rel_1 = relgf
                rel_0 = relg
            else:
                rel_1 = rel
                rel_0 = relr
            relchg = rel_1 - rel_0
            if use_for_base_adjust( aid ):
                wt = rel_0 * pow( power, relchg )  # N.B.: wt is always > 0 "if use_for_base_adjust()"
                nadj += 1
                wt_u += (wt - wt_u) / nadj
                rel_u += (rel_0 - rel_u) / nadj
                baser.append(ebvr)
                basef.append(ebv)
                basew.append(wt)  #  N.B.:  these are all positive values in basew
                r0.append(relr)
                rchg.append(relchg)
                if relchg <= 0.5:
                    rch0.append(wt)
                elif relchg < 1.5:
                    rch1.append(wt)
                elif relchg < 2.5:
                    rch2.append(wt)
                elif relchg < 3.5:
                    rch3.append(wt)
                elif relchg < 4.5:
                    rch4.append(wt)
                else:
                    rch5.append(wt)
                str_wt = "%.5f" % wt
                if args.mergefiles:
                    fmerge_base.write(','.join((aid, byr, top, off, sta, nd, nh,
                                                'Cf', str(edc), str(rel), str(ebv),
                                                'Df', str(edcd), str(reld), str(dpgm),
                                                'Gf', str(edcgf), str(relgf), str(gebvf),
                                                'DGf', str(edcvf), str(relvf), str(dgebvf),
                                                'Cr', str(edcr), str(relr), str(ebvr),
                                                'Gr', str(edcg), str(relg), str(gebv), str_wt)))
                    fmerge_base.write('\n')
        if nadj < min_adj:
            log.write('%s: WARNING: %d bulls is not enough for base correction.  The base will not be adjusted\n' % ( trt, nadj ) );
        else:
            zf = np.array(basef);
            zr = np.array(baser)
            zw = np.array(basew)
            nwt = zw.sum()
            base_b, base_se, base_R2 = simple_wls(zr, zf, zw)

    emptyCr = add_extra( dataGr )
    emptyGr = add_extra( dataGr )
    emptyCf = add_extra( dataCf )
    emptyDf = add_extra( dataDf )
    emptyGf = add_extra( dataGf )
    emptyVf = emptyGf

    # Derive any required de-regressed GEBV
    DG_TYPE = trgA + 'f'
    if target == "DGEBV":
        dataVf = derived_from( dataGf, dataGr, emptyGf, emptyGr )
    else:
        DG_TYPE = 'Vf'

    #==========================================================================
    # CREATE DATA FOR CANDIDATE AND TEST BULLS
    # process bulls on Cf file and look for stored records from other datasets
    # - edc/rel/"ebv" are zero if no record is found for other datasets
    for aid in dataCf:
        byr = byear.get(aid, '0000')
        top, off, sta, nd, nh, edc, rel, ebv, extra_Cf = dataCf[aid]
        topd, offd, stad, ndd, nhd, edcd, reld, dpgm, extra_Df = dataDf.get(aid, emptyDf)
        topgf, offgf, stagf, ndgf, nhgf, edcgf, relgf, gebvf, extra_Gf = dataGf.get(aid, emptyGf)
        topvf, offvf, stavf, ndvf, nhvf, edcvf, relvf, dgebvf, extra_DGf = dataVf.get(aid, emptyVf)
        topr, offr, star, ndr, nhr, edcr, relr, ebvr, extra_Cr = dataCr.get(aid, emptyCr)
        topg, offg, stag, ndg, nhg, edcg, relg, gebv, extra_Gr = dataGr.get(aid, emptyGr)

        # skip any bulls born before min_byear
        if byr < min_byear:
            continue

        # - candidates are young proven bulls from the full dataset (Cf) that
        #   have no daughters in the reduced dataset (Cr)
        # - candidates are usually only domestic bulls, but if they are
        #   fewer than 50 or so the user may optionally include foreign bulls
        candidate = 'Y'
        if type2x == 'N' and (top >= '20' or sta >= '20'):
            candidate = 'N'
        elif edc < 20:
            candidate = 'N'
        elif (edcr > 0 or topg == '24') and val_weight == 'ITB':
            candidate = 'N'

        # - test bulls are the subset of candidate bulls that also have
        #   a GEBV record (Gr) with non-zero reliability and
        #   a DD/DPGM record (Df) and a parent average record (Cr)
        flag = testbull = 'N'
        no_Df = no_Cr = 0
        if candidate == 'Y':
            ebvc.append(ebv)
            if relg > 0.:
                # bull has gebv record (file300Gr) with reliability > 0
                flag = 'Y'
                flag2 = 'Y'
                if (target == "DEBV" and not aid in dataDf):
                    # bull does not have the required DD/D_PGM record (file300Df)
                    no_Df += 1
                    flag2 = 'N'
                if aid not in dataCr:
                    # bull has no EBVr record (file300Cr)
                    no_Cr += 1
                    flag2 ='N'
                    
                if flag2 == 'Y':
                    # - additionally (since 2024 version) requiring (rel_Gf > Grel_Gr)
                    if genomic_target != 'Y' or relgf > relg:
                        testbull = 'Y'
                        ebvt.append(ebv)
                        x1.append(gebv)
                        x2.append(ebvr)
                        x1_rel.append(relg)
                        x2_rel.append(relr)
                        if genomic_target == 'Y':
                            ghatf.append(gebvf)
                        else:
                            ghatf.append(ebv)
                        if target == 'GEBV':
                            y.append(gebvf)
                            y_rel.append(relgf)
                        elif target == 'EBV':
                            y.append(ebv)
                            y_rel.append(rel)
                        elif target == "DGEBV" :
                            y.append(dgebvf)
                            y_rel.append(relvf)
                        else:
                            y.append(dpgm)
                            y_rel.append(reld)
                        if val_weight == 'ITB':
                            if target == "DGEBV" :
                                w.append(edcvf/(edcvf+Lambda))
                            elif target == "DEBV" :
                                w.append(edcd/(edcd+Lambda))
                            else:
                                w.append(edc/(edc+Lambda))
                        else:
                            w.append(1.0)

        if args.mergefiles:
            # create a merged dataset for additional checks with SAS, R, etc.
            aid = aid.replace(' ', '~')
            fmerge.write(','.join((aid, byr, flag, candidate, testbull,
                                   top, off, sta, nd, nh,
                                   'Cf', str(edc), str(rel), ( str(ebv) + ',' + extra_Cf if extra_Cf else str(ebv) ),
                                   'Df', str(edcd), str(reld), ( str(dpgm) + ',' + extra_Df if extra_Df else str(dpgm) ),
                                   'Gf', str(edcgf), str(relgf), ( str(gebvf) + ',' + extra_Gf if extra_Gf else str(gebvf) ),
                                   DG_TYPE, str(edcvf), str(relvf), ( str(dgebvf) + ',' + extra_DGf if extra_DGf else str(dgebvf) ),
                                   'Cr', str(edcr), str(relr), ( str(ebvr) + ',' + extra_Cr if extra_Cr else str(ebvr) ),
                                   'Gr', str(edcg), str(relg), ( str(gebv) + ',' + extra_Gr if extra_Gr else str(gebv) ) )))
            fmerge.write('\n')

    if rel_u:
        OBSprop = wt_u / rel_u
    else: OBSprop = 0.0

    if no_Df > 0:
        log.write('%s: WARNING: there were %d potential TEST bulls with no'
                  ' Df record!\n' % (trt, no_Df))
    if no_Cr > 0:
        log.write('%s: WARNING: there were %d potential TEST bulls with no'
                  ' Cr record!\n' % (trt, no_Cr))
    if len(ebvc) == 0:
        log.write('%s: no candidate bulls found for this trait\n' % trt)
        continue
    if len(ebvt) == 0:
        log.write('%s: no test bulls found for this trait\n' % trt)
        continue

    log.write('\nSummary statistics on candidate bulls (CB) and test bulls'
              ' (TB)\n')
    log.write('-'*62 + '\n')
    log.write('Trait Variable         N      Mean       Std      Min     '
              ' Max\n')
    log.write('-'*62 + '\n')
    fmt = '%s   %s %6d %9.3f %9.3f %8.2f %8.2f\n'
    fmtB = '%s   %s %4s%s %6d %9.3f %9.3f %8.2f %8.2f\n'
    if basevar == "NONE":
        fmtF = fmtR = fmt  # no need to distinguish which Base when --baseadj=NONE
    else:
        fmtF = '%s   %s %6d %9.3f %9.3f %8.2f %8.2f Base=Current\n'
        fmtR = '%s   %s %6d %9.3f %9.3f %8.2f %8.2f Base=Old\n'
    zc = np.array(ebvc); ncb = len(zc)
    zc_mean = zc.mean()
    zc_sd = zc.std(ddof=1)
    log.write(fmt % (trt, 'CB EBV     ', ncb, zc_mean, zc_sd,
                     zc.min(), zc.max()))
    zt = np.array(ebvt); ntb = len(zt)
    zt_mean = zt.mean()
    log.write(fmt % (trt, 'TB EBV     ', ntb, zt_mean, zt.std(ddof=1),
                     zt.min(), zt.max()))

    ghatf = np.array(ghatf)
    ghatf_sd = ghatf.std(ddof=1)
    if ghatf_sd > 0:
        stdize = 1.0 / ghatf_sd
    else:
        stdize = 0.0;
    tolerated_bias = tolerated_bias_stdized * ghatf_sd

    w = np.array(w)

    y = np.array(y); n = len(y)
    y_mean = y.mean()
    y_sd = y.std(ddof=1)
    y_var = y.var(ddof=1)
    log.write(fmtF % (trt, 'TB  VAL(y) ', n, y_mean, y_sd,
                     y.min(), y.max()))
    x1 = np.array(x1)
    x1_mean = x1.mean()
    x1_sd = x1.std(ddof=1)
    log.write(fmtR % (trt, 'TB GEBV(x1)', n, x1_mean, x1_sd,
                     x1.min(), x1.max()))
    x2 = np.array(x2)
    x2_mean = x2.mean()
    x2_sd = x2.std(ddof=1)
    log.write(fmtR % (trt, 'TB EBVr(x2)', n, x2_mean, x2_sd,
                     x2.min(), x2.max()))
    y_rel = np.array(y_rel)
    x3_rel = np.array(x1_rel)
    x4_rel = np.array(x2_rel)
    y_rel_mean = y_rel.mean()
    x3_rel_mean = x3_rel.mean()
    x4_rel_mean = x4_rel.mean()

    #---------------------------------------------------------------------------
    # calculate test stats

    # model 1: D_PGM = b0 + b1*GEBV + e
    b1, bse1, R2_1 = simple_wls(x1, y, w)  #  Before Base Adjustment
    x3 = base_b[0] + base_b[1] * x1
    b3, bse3, R2_3 = simple_wls(x3, y, w)  #   After Base Adjustment

    # model 2: D_PGM = b0 + b1*EBVr + e
    b2, bse2, R2_2 = simple_wls(x2, y, w)  #  Before Base Adjustment
    x4 = base_b[0] + base_b[1] * x2
    b4, bse4, R2_4 = simple_wls(x4, y, w)  #   After Base Adjustment

    if genomic_target == 'Y':
        gchg_sd = ( ghatf - x3 ).std(ddof=1)
        gchg_var = ( ghatf - x3 ).var(ddof=1)
    else:
        gchg_sd = 0
        gchg_var = 0

    x3_mean = x3.mean()
    x3_sd = x3.std(ddof=1)
    x3_var = x3.var(ddof=1)
    x4_mean = x4.mean()
    x4_sd = x4.std(ddof=1)
    x4_var = x4.var(ddof=1)

    if nadj >= min_adj:
        log.write(fmtF % (trt, 'TB GEBV(x1)', n, x3_mean, x3_sd,
                         x3.min(), x3.max()))
        log.write(fmtF % (trt, 'TB EBVr(x2)', n, x4_mean, x4_sd,
                         x4.min(), x4.max()))
        fmtB = '%s   %s %4s%s %6d %9.3f %9.3f %8.2f %8.2f\n'
        log.write(fmtB % (trt, 'BB', basevar, 'f(y)', nadj, zf.mean(), zf.std(ddof=1),
                         zf.min(), zf.max()))
        log.write(fmtB % (trt, 'BB', basevar, 'r(x)', nadj, zr.mean(), zr.std(ddof=1),
                         zr.min(), zr.max()))
        log.write(fmtB % (trt, 'BB', basevar, '(wt)', nadj, zw.mean(), zw.std(ddof=1),
                         zw.min(), zw.max()))
        log.write('-'*62 + '\n')


    dev = np.arange( -2.0, 2.1, 1.0 )
    n_biasgrps = len( dev )
    if n_samples > 0:
        # Apply a bootstrap test that R-squared was increased due to genotyping
        #R2_GEBV = np.zeros( n_samples, np.float )
        #R2_EBV = np.zeros( n_samples, np.float )
        #increases = np.zeros( n_samples, np.float )
        R2_strap = np.zeros( n_samples, np.float )
        b_strap = np.zeros( (2, n_samples), np.float )
        bse_strap = np.zeros( n_samples, np.float )

        bias_strap = np.zeros( (2, 2, n_samples, n_biasgrps ), np.float )

        dev3 = x3_mean + ( dev * x3_sd )
        bias3 = dev3 - ( b3[0] + b3[1] * dev3 )
        dev4 = x4_mean + ( dev * x4_sd )
        bias4 = dev4 - ( b4[0] + b4[1] * dev4 )

        i = 0
        np.random.seed( int(evdate) )
        while i < n_samples:
            sample = np.random.randint(n, size=n)
            try:
                b_GEBV, se_GEBV, R2_GEBV = simple_wls( x3[sample,], y[sample], w[sample] )
                b_EBV,  se_EBV,  R2_EBV  = simple_wls( x4[sample,], y[sample], w[sample] )
                b_strap[0][i]  = b_GEBV[1]
                b_strap[1][i]  = b_EBV[1]
                bse_strap[i]  = se_GEBV[1]
                R2_strap[i] = R2_GEBV - R2_EBV

                GEBV_mean = x3[sample,].mean()
                GEBV_sd = x3[sample,].std(ddof=1)
                EBV_mean = x4[sample,].mean()
                EBV_sd = x4[sample,].std(ddof=1)
                GEBV_dev = GEBV_mean + ( dev * GEBV_sd )
                EBV_dev = EBV_mean + ( dev * EBV_sd )
                bias_strap[0][0][i] = GEBV_dev - ( b_GEBV[0] + b_GEBV[1] * GEBV_dev )
                bias_strap[0][1][i] =     dev3 - ( b_GEBV[0] + b_GEBV[1] *     dev3 )
                bias_strap[1][0][i] =  EBV_dev - (  b_EBV[0] +  b_EBV[1] *  EBV_dev )
                bias_strap[1][1][i] =     dev4 - (  b_EBV[0] +  b_EBV[1] *     dev4 )

                i += 1
            except np.linalg.LinAlgError:
                # skip samples with singular LHS (e.g. if repeat sampling only 1 observation when n is very small)
                pass
        prob_higher_R2 = 100.0 * (R2_strap > 0).sum() / n_samples
        high_bullbias = 100.0 * ( bias_strap > tolerated_bias ).sum( axis=2 ) / n_samples
        low_bullbias = 100.0 * ( bias_strap < -tolerated_bias ).sum( axis=2 ) / n_samples
        prob_unbiased = np.minimum( 99.9, 2 * (100.0 - np.maximum( high_bullbias, low_bullbias )) )  #  2-sided test:  P(H0 is true)

        punbiased = np.empty( shape=(2,2), dtype=float )
        pbiasH = np.empty( shape=(2,2), dtype=float )
        pbiasL = np.empty( shape=(2,2), dtype=float )
        top_bgrp = n_biasgrps - 1
        for i in range(2):  # GEBV and EBV
            # average bull (j=0)
            if (top_bgrp % 2) == 0:
                k = int( top_bgrp/2 )
                punbiased[i][0] = prob_unbiased[i][0][k]
                pbiasH[i][0] = high_bullbias[i][0][k]
                pbiasL[i][0] = low_bullbias[i][0][k]
            else:
                k = int( (top_bgrp-1) / 2 )
                punbiased[i][0] = 0.5 * ( prob_unbiased[i][0][k] + prob_unbiased[i][0][k+1] )
                pbiasH[i][0] = 0.5 * ( high_bullbias[i][0][k] + high_bullbias[i][0][k+1] )
                pbiasL[i][0] = 0.5 * ( low_bullbias[i][0][k] + low_bullbias[i][0][k+1] )
            # top bull (j=1) with a (G)EBV 2 s.d. above the mean
            k = top_bgrp
            punbiased[i][1] = prob_unbiased[i][0][k]
            pbiasH[i][1] = high_bullbias[i][0][k]
            pbiasL[i][1] = low_bullbias[i][0][k]

        biases=[]
        j = 0
        for i in range(2):
            bflags = ''
            for k in range(n_biasgrps):
                xH = high_bullbias[i][j][k]
                xL = high_bullbias[i][j][k]
                if xH > 97.5:
                    bflag = 'H'
                elif xH > 95.0:
                    bflag = '+' #'p'
                elif xL > 97.5:
                    bflag = 'L'
                elif xL > 95.0:
                    bflag = '-' #'n'
                else:
                    bflag = '.'
                m = int((n_biasgrps-1)/2)
                if k == m:
                    bflags = bflags + '('
                elif k == (n_biasgrps - m):
                    bflags = bflags + ')'
                elif k < m or k > (n_biasgrps - m):
                    bflags = bflags #+ ' '
                bflags = bflags + bflag
            biases.append( bflags )
        bse_boot = b_strap.std(1)
    else:
        # Use the Fisher-transform approximation for sampling variance of correlations:
        #   Let R=r*r=R-squared, g=GEBV(reduced) and pa=EBV(reduced)
        #   V(Ri) = (1-Xi*Xi)/(n-2), for i=g,pa
        #   V(Rg-Rpa) = V(Rg) + V(Rpa) - 2C(g,pa), and we approximate C(g,pa) = r(g,pa)*(V(Rg)+V(Rpa))/2
        #   which simplifies to:  V(Rg-Rpa) = (1-r(g,pa)) * (V(Rg)+V(Rpa)
        prob_higher_R2 = 100.0 * (R2_3 > R2_4)
        bse_boot = [ bse3[1], bse4[1] ]

    if base_R2 == 0.0: log.write( '%s: WARNING: rsquared( BaseAdjust ) = 0 !!\n' % trt )
    if R2_1 == 0.0: log.write( '%s: WARNING: rsquared( GEBVr ) = 0 !!\n' % trt )
    if R2_2 == 0.0: log.write( '%s: WARNING: rsquared( EBVr ) = 0 !!\n' % trt )
    if R2_3 == 0.0: log.write( '%s: WARNING: rsquared( adj_GEBVr ) = 0 !!\n' % trt )
    if R2_4 == 0.0: log.write( '%s: WARNING: rsquared( adj_EBVr) = 0 !!\n' % trt )

    u1 = np.array( r0 )
    u2 = np.array( rchg )
    
    if nadj > min_adj:
        d0 = np.array( rch0 )
        d1 = np.array( rch1 )
        d2 = np.array( rch2 )
        x = 100.0 / zw.sum()
        z = 100.0 / nadj
        if args.verbose:
            log.write( '%s : %s : Base %4.2f: Full = %.2f + %.3f * Red  b1 %.3f ~> %.3f  b2 %.3f ~> %.3f'
                       % (trt, target, power, base_b[0], base_b[1], b1[1], b3[1], b2[1], b4[1]) )
            log.write( '\n' )

    # estimate selection differential and search for corresponding p and x
    i_est = (zt_mean - zc_mean) / zc_sd
    if args.verbose:
        log.write('\nDetails of GEBVtest calculations\n')
        log.write('%s i_est = (%0.3f - %0.3f) / %0.3f = %0.3f\n'
                  % (trt, zt_mean, zc_mean, zc_sd, i_est))
    i_est = abs(i_est)
    if i_est < 0.0001:
        i = x = 0.; p = 1.
    else:
        x = 0.001 * (np.arange(5000, -5000, -1))
        d = np.exp(-x**2/2) / np.sqrt(2*np.pi)
        p = 0.001 * np.cumsum(d)
        i = np.cumsum(x * d) / np.cumsum(d)
        for k in range(len(x)):
            if i[k] <= i_est:
                p = p[k]
                x = x[k]
                break

    #  BEFORE --baseadj
    # calculate E(b1) given selection
    k = i_est * (i_est - x)
    R2b = R2_1 / (1. - k + k * R2_1)
    E_b1 = (1. - k) / (1. - k * R2b)
    if bse1[1] > 0:
        t_val = (b1[1] - E_b1) / bse1[1]
    else: t_val = 9.9

    # Combine different tests and assign an overall test result (test_final)
    pass_stat = 'Y' if abs(t_val) < 2. else 'N'
    pass_bio = 'Y' if b1[1] > E_b1 - 0.1 and b1[1] < 1.2 else 'N'
    pass_pract = 'Y' if b1[1] > 0.8        and b1[1] < 1.2 else 'N'  # symmetric thresholds for hiSE
    warn_bio = 'Y' if b1[1] >= 0.8       and b1[1] <= 1.25 else 'N'  # symmetric thresholds for a WARN
    if (trt == 'mil' or trt == 'fat' or trt == 'pro'): warn_bio = 'N'  # WARN is not offered for these traits
    if (pass_stat == 'Y' or pass_bio == 'Y'):
        if pass_pract == 'Y':
            b1_test = 'Pass'
        else:   # b1 is far from E(b1), but power of the test is too weak to conclude a Fail
            if t_val < 1: 
                b1_test = 'NS-'
            else:
                b1_test = 'NS+'
    elif warn_bio == 'Y':
        pass_bio = 'W'
        b1_test = 'Warn'
    else:
        b1_test = 'Fail'

    if prob_higher_R2 > 95:
        pass_R2 = 'Y'
    elif prob_higher_R2 > 2.5:
        pass_R2 = '-'
    else:
        pass_R2 = 'N'

    if b1_test == 'Fail' or pass_R2 == 'N':
        pass_unadj = 'FAIL'
    elif b1_test == 'Pass':
        pass_unadj = 'PASS'
    elif b1_test == 'Warn':
        pass_unadj = 'WARN'
    else:
        pass_unadj = 'hiSE'

    #  AFTER --baseadj
    # calculate E(b1) given selection
    k = i_est * (i_est - x)
    R2b = R2_3 / (1. - k + k * R2_3)
    E_b1 = (1. - k) / (1. - k * R2b)
    R2_Eb = np.array( [R2_3, R2_4] )
    E_b  = 1. - (1. - R2_Eb) * k
    z = [ b3[1], b4[1] ] - E_b
    t_val = z
    for i in range(2):
        if bse_boot[i] > 0:
            t_val[i] /= bse_boot[i]
        else: t_val[i] = 9.9
    prob_bdif = np.zeros( 2, np.float )
    if n_samples > 0:
        for i in range(2):
            # bootstrap test of b1-E_b1
            if z[i] > 0:   # slope is higher than the expected value
                prob_bdif[i] = 100.0 * (1 - (2.0 * (b_strap[i] < E_b[i]).sum()) / n_samples )
            else:   # slope is lower than the expected value
                prob_bdif[i] = 100.0 * (1 - (2.0 * ( b_strap[i] > E_b[i]).sum()) / n_samples )
    prob_Eb = 100.0 - prob_bdif
    j = 0   # Only need final test result for Gr
    delta = np.sort(b_strap[j] - E_b[j])
    i = int( 0.025 * n_samples + 0.5 )
    if i<1: i=1
    CI_025=delta[i-1]
    i = int( 0.975 * n_samples + 0.5 )
    CI_975=delta[i-1]
    #t_025=b3[1] - E_b1 - 2.0 * bse3[1]
    #t_975=b3[1] - E_b1 + 2.0 * bse3[1]
    pass1_boot = 'Y' if CI_025 < 0 and CI_975 > 0 else 'N'

    # Combine different tests and assign an overall test result (test_final)
    #pass_stat = 'Y' if abs(t_val) < 2. else 'N'
    pass_stat = pass1_boot
    pass_bio = 'Y' if b3[1] > E_b1 - 0.1 and b3[1] < 1.2 else 'N'
    pass_pract = 'Y' if b3[1] > 0.8        and b3[1] < 1.2 else 'N'  # symmetric thresholds for hiSE
    warn_bio = 'Y' if b1[1] >= 0.8       and b1[1] <= 1.25 else 'N'  # symmetric thresholds for a WARN
    if (trt == 'mil' or trt == 'fat' or trt == 'pro'): warn_bio = 'N'  # WARN is not offered for these traits
    if (pass_stat == 'Y' or pass_bio == 'Y'):
        if pass_pract == 'Y':
            b1_test = 'Pass'
        else:   # b1 is far from E(b1), but power of the test is too weak to conclude a Fail
            if t_val[0] < 1: 
                b1_test = 'NS-'
            else:
                b1_test = 'NS+'
    elif warn_bio == 'Y':
        pass_bio = 'W'
        b1_test = 'Warn'
    else:
        b1_test = 'Fail'

    if prob_higher_R2 > 95:
        pass_R2 = 'Y'
        R2_test = 'Pass'
    elif prob_higher_R2 > 2.5:
        pass_R2 = '-'
        R2_test = 'NS'
    else:
        pass_R2 = 'N'
        R2_test = 'Fail'

    if b1_test == 'Fail' or R2_test == 'Fail':
        pass_final = 'FAIL'
        pass_all = 'N'
    elif b1_test == 'Pass':
        pass_final = 'PASS'
        pass_all = 'Y'
    elif b1_test == 'Warn':
        pass_final = 'WARN'
        pass_all = 'W'
    else:
        pass_final = 'hiSE'
        pass_all = '-'

    if pass_stat == 'Y':
        b_stat = 'Pass'
    elif t_val[0] < 0:
        b_stat = 'Low'
    else:
        b_stat = 'High'

    if pass_bio == 'Y' and warn_bio == "N":
        b_bio = 'Pass'
    elif b3[1] < 1:
        b_bio = 'Low'
    else:
        b_bio = 'High'

    if pass_pract == 'Y':
        b_pract = 'Pass'
    elif b3[1] < 1:
        b_pract = 'Low'
    else:
        b_pract = 'High'

    if pass_R2 == 'Y':
        R2_test = 'Pass'
    elif pass_R2 == '-':
        if R2_3 < R2_4:
            R2_test = 'NS-'
        else:
            R2_test = 'NS+'
    else:
        R2_test = 'Fail'

    if ( basevar != "NONE" and nadj < 2 * ntb):
        pass_stat = 'x'
        pass_pract = 'x'
        pass_bio = 'x'
        pass_final = 'xxxx'
        b_stat = 'xxxx'
        b_pract = 'xxxx'
        b_bio = 'xxxx'
        b1_test = 'xxxx'

    log.write('\nAfter Base adjustments:\n')
    log.write('%s p=%0.3f x=%6.3f i=%0.3f k=%0.3f R2b=%0.3f E(b1)=%0.3f b1=%0.3f\n'
              % (trt, p, x, i_est, k, R2b, E_b1, b3[1] ))
    log.write('%s b1-E(b1) =%7.3f  t =%6.2f  bootstrap P(b=Eb) =%5.1f  b1_test = %s\n'
              % (trt, b3[1]-E_b[0], t_val[0], prob_Eb[0], b1_test ))
    log.write('%s R2_1 =%5.1f  R2_2 =%5.1f   bootstrap P(R2_gain) =%5.1f  R2_test = %s\n'
              % (trt, 100.*R2_3, 100.*R2_4, prob_higher_R2, R2_test ))
    log.write('%s   stat_test=%s   pract_test=%s   bio_test=%s   R2_test=%s   overall = %s\n\n'
              % (trt, pass_stat, pass_pract, pass_bio, pass_R2, pass_final))

    if pass_final == 'hiSE': hiSE_footnote='Y'
    if pass_final == 'xxxx': xxxx_footnote='Y'
    fmt = ('735 %s %s %s %s %d%6d%10.4f%10.4f %s%10.4f%10.4f%10.4f%10.4f'
           '%8.4f%8.4f%6d%8.4f%8.4f%5.1f  %s %s   %s%s%s%s  %s\n')
    f735.write(fmt % (brd, pop, trt, rundate, 1, ntb, y_mean, y_sd,
                      depvar, x3_mean, x3_sd, b3[0], bse3[0],
                      b3[1], bse_boot[0], ncb, i_est, E_b1, 100.*R2_3,
                      type2x, min_byear, pass_stat, pass_pract, pass_bio, pass_R2, pass_final))
    f735.write(fmt % (brd, pop, trt, rundate, 2, ntb, y_mean, y_sd,
                      depvar, x4_mean, x4_sd, b4[0], bse4[0],
                      b4[1], bse4[1], ncb, 0.0, 1.0, 100.*R2_4,
                      type2x, min_byear, '-', '-', '-', '-', '----'))

    bXtY='b' + basevar[0:1] + 't' + trg
    fmt = '%s,%s,%s,%s,%s,%s,%s,%s,%s,%d,%d,%d'
    fcsv.write(fmt % ( brd, pop, trt, evdate, rundate, basevar[0:1], trgA, trgB, type2x, nadj, ncb, ntb ))
    fmt = ',%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.1f,%.1f'
    fcsv.write(fmt % ( base_b[1], b4[1], b3[1], E_b[1], E_b[0], bse_boot[1], bse_boot[0], t_val[1], t_val[0] ) )
    fmt=',%.2f,%.2f,%s,%s,%s,%s'
    fcsv.write(fmt % ( prob_Eb[1], prob_Eb[0], b_stat, b_pract, b_bio, b1_test ))
    fmt=',%.1f,%.1f,%.2f,%s,%s,%s,%s,%s,%.1f,%.1f,%.1f,%.1f'
    fcsv.write(fmt % ( 100.*R2_3, 100.*(R2_3 - R2_4), prob_higher_R2, R2_test,
                       pass_stat + pass_pract + pass_bio + pass_R2, pass_final,
                       biases[0], biases[1], punbiased[0][0], punbiased[0][1], punbiased[1][0], punbiased[1][1] ))

    chg_Gave = b3[0] + (b3[1]-1) * x3_mean
    chg_G1sd = b3[0] + (b3[1]-1) * (x3_mean + 1 * x3_sd)
    chg_G2sd = b3[0] + (b3[1]-1) * (x3_mean + 2 * x3_sd)
    chg_Eave = b3[0] + (b3[1]-1) * x4_mean
    chg_E1sd = b3[0] + (b3[1]-1) * (x4_mean + 1 * x4_sd)
    chg_E2sd = b3[0] + (b3[1]-1) * (x4_mean + 2 * x4_sd)

    rounded_var = round( gchg_var )
    if rounded_var < 1:
        v_gchg = format( gchg_var, ".4f" )
    elif rounded_var < 10:
        v_gchg = format( gchg_var, ".3f" )
    elif rounded_var < 100:
        v_gchg = format( gchg_var, ".2f" )
    elif rounded_var < 1000:
        v_gchg = format( gchg_var, ".1f" )
    else:
        v_gchg = format( gchg_var, ".0f" )

    fmt=',%.2f,%s,%.2f,%.2f,%.2f,%.2f'
    fcsv.write(fmt % ( ghatf_sd, v_gchg, chg_Gave*stdize, chg_G2sd*stdize, chg_Eave*stdize, chg_E2sd*stdize ))
    fmt=',%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f'
    fcsv.write(fmt % ( chg_Gave, chg_G1sd, chg_G2sd,
                       y_mean, x3_mean, x4_mean, y_sd, x3_sd, x4_sd ))
    fmt=',%.1f,%.1f,%.1f,%.0f,%.0f,%.3f,%.3f,%.3f,%.3f,%.3f'
    if x3_rel_mean == 0: xR3 = 0
    else: xR3 = 10000.0 * R2_3 / x3_rel_mean
    if x4_rel_mean == 0: xR4 = 0
    else: xR4 = 10000.0 * R2_4 / x4_rel_mean
#                       10000.0 * R2_3 / x3_rel_mean, 10000.0 * R2_4 / x4_rel_mean,
    fcsv.write(fmt % ( y_rel_mean, x3_rel_mean, x4_rel_mean, xR3, xR4,
                       b3[0], bse3[0], b4[0], bse4[0], bse4[1]
                   ))
    fcsv.write('\n')

    # allowing LR and ITB weights to run simultaneously with same BaseAdjust output files... only create these when --weight=ITB (default)
    if nadj > min_adj and val_weight != "LR":
        if not base_adjusted:
            f735adj = open(fileadj, 'w')
            f735adj.write('brd pop trt evaldate p_base   Ne/N       N      Ne    a_base b_base   bval  bvadj  pass  padj')
            f735adj.write('  R2adj  R2val  WdR0  WdR1  WdR2  WdR3  WdR4 WdR5+  NdR0  NdR1  NdR2  NdR3  NdR4 NdR5+\n')
            base_adjusted = True
        f735adj.write('%s %s %s %s %6.3f %6.3f %7.0f %7.0f %9.3f %6.3f %6.3f %6.3f %5s %5s %6.1f %6.1f' %
                      ( brd, pop, trt, rundate, power, OBSprop, nadj, OBSprop * nadj,
                        base_b[0], base_b[1], b1[1], b3[1], pass_unadj, pass_final, 100.0*base_R2, 100.0*R2_1 ))
        d0 = np.array( rch0 )
        d1 = np.array( rch1 )
        d2 = np.array( rch2 )
        d3 = np.array( rch3 )
        d4 = np.array( rch4 )
        d5 = np.array( rch5 )
        x = 100.0 / zw.sum()
        z = 100.0 / nadj
        f735adj.write(' %5.1f %5.1f %5.1f %5.1f %5.1f %5.1f %5.1f %5.1f %5.1f %5.1f %5.1f %5.1f\n' %
                      ( x*d0.sum(), x*d1.sum(), x*d2.sum(), x*d3.sum(), x*d4.sum(), x*d5.sum(),
                        z*len(d0),  z*len(d1),  z*len(d2),  z*len(d3),  z*len(d4),  z*len(d5) ))
        f735adj.flush()

        ibutils.dated_msg(sys.argv[0]+': finished '+trt, log=log)
        log.write( '\n' )

    log.flush()
    f735.flush()
    fcsv.flush()

    # allowing LR and ITB weights to run simultaneously with same BaseAdjust output files... only create these when --weight=ITB (default)
    if args.mergefiles and basevar != "NONE":
        fmerge.close()
        fmerge_base.close()
        fmerge = open( fn_merge, 'r' )
        fmerge2 = open( fn_merge2, 'w' )
        fmerge_base = open( fn_merge_base, 'r' )
        fmerge_base2 = open( fn_merge_base2, 'w' )
        for line in fmerge:
            x = line.strip('\n').split(',')
            n = len(x)
            i = 0
            while i < n:
                if len(x[i]) > 1 and x[i][1] == 'r':
                    i += 3
                    if i < n:
                        x[i] = "%.1f" % ( base_b[0] + base_b[1] * float(x[i]) )
                i += 1
            line2 = ','.join( x )
            fmerge2.write( line2 )
            fmerge2.write( '\n' )
        fmerge.close()
        fmerge2.close()
        for line in fmerge_base:
            x = line.strip('\n').split(',')
            n = len(x)
            i = 0
            while i < n:
                if len(x[i]) > 1 and x[i][1] == 'r':
                    i += 3
                    if i < n:
                        x[i] = "%.1f" % ( base_b[0] + base_b[1] * float(x[i]) )
                i += 1
            line2 = ','.join( x )
            fmerge_base2.write( line2 )
            fmerge_base2.write( '\n' )
        fmerge_base.close()
        fmerge_base2.close()

if hiSE_footnote == 'Y':
    f735.write( '\nNOTE: pass=hiSE indicates an inconclusive statistical PASS due to' )
    f735.write( ' high SE(b1) while FAILING the practical test: b1<0.8 or b1>1.2\n' )
if xxxx_footnote == 'Y':
    f735.write( '\nNOTE: pass=xxxx indicates an insufficient number of progeny-proven bulls' )
    f735.write( ' were included in the reduced data for --baseadj\n' )
f735.close()

ibutils.dated_msg(sys.argv[0]+': end', log=log)
log.close()

# end here if no zip file is to be created (-Z option)
if args.no_zip:
    sys.exit(0)

# prepare the zip file
names = ['file300Cf', 'file300Gf', 'file300Cr', 'file300Gr', 'file736', 'traits']
if target == 'DEBV': names.insert( 0, 'file300Df' )
files = [name + _POPBRD for name in names] + [fileout] + [filelog] + [filecsv]
filezip = os.path.join( outdir, 'gt%s%s.zip' % (rundate[2:6], _POPBRD) )
#filezip = os.path.join( outdir, 'gt%s_%s_%s%s.zip' % (rundate[2:6], basevar[0:1], trg, _POPBRD) )
#print( 'creating archive: ', filezip );

with zipfile.ZipFile(filezip, 'w', zipfile.ZIP_DEFLATED) as zf:
    for fname in files:
        zf.write(fname, basename(fname) )
    if os.path.isfile( fileadj ):
        zf.write( fileadj, basename(fileadj) )

if args.verbose:
    print('%s: files zipped to %s' %
          (sys.argv[0], os.path.join(args.datadir, filezip)))

if args.cleanup:
    # delete all files added to the zip file
    for fname in files:
        os.unlink(fname)

sys.exit(0)
