# Compare two file containing a list of normalized distributions by computing average and standart deviation of delivred reatio difference for all times before full delivery.

import sys
import math
import gzip

if len(sys.argv)!=6:
    sys.stdout.write("Invalid count of arguments, should be used as follow :\npython %s Ref_file_Path Compared_file_path Max_contact_time Removed_node_corresponding_to_compared_file Output_file_path"%sys.argv[0])
    sys.exit(0)

# Compared files
fRef = gzip.open(sys.argv[1],'r')
fCmp = gzip.open(sys.argv[2],'r')

# Output file for average and standart deviation results
out = gzip.open(sys.argv[5]+"DiffusionTousComparaison",'a')

# For all times (corresponding to the number of distributions by files
for m in range(int(sys.argv[3])):
    t = int(sys.argv[3])-m
    lRef = fRef.readline().split()
    lCmp = fCmp.readline().split()
    
    permuted = False
    last_time = 0
    old_ref = 0
    old_cmp = 0
    moy = 0
    moyCarre = 0

    # Computing difference and square difference until one of the distribution reach full delivery
    while (float(lRef[1])<1 or (int(lRef[0])>int(lCmp[0]))) and (float(lCmp[1])<1 or (int(lCmp[0])>int(lRef[0]))):
        dif = abs(old_ref-old_cmp)
        delta = min(int(lRef[0]),int(lCmp[0]))-last_time
        last_time = min(int(lRef[0]),int(lCmp[0]))
        moy += dif*delta
        moyCarre += dif*dif*delta
        if int(lRef[0])==last_time:
            old_ref = float(lRef[1])
            lRef = fRef.readline().split()
        if int(lCmp[0])==last_time:
            old_cmp = float(lCmp[1])
            lCmp = fCmp.readline().split()


    dif = abs(old_ref-old_cmp)
    delta = min(int(lRef[0]),int(lCmp[0]))-last_time
    last_time = min(int(lRef[0]),int(lCmp[0]))
    moy += dif*delta
    moyCarre += dif*dif*delta

    # If lRef if the one with latest full delivery, roles are swapped
    if float(lRef[1])<1:
        permuted = True
        lCmp,fCmp,old_cmp,lRef,fRef,old_ref=lRef,fRef,old_ref,lCmp,fCmp,old_cmp

    # Complete the computation until both reach full delivery, this part is skipped when distributions reach full delivery at the same time
    while float(lCmp[1])<1:
        dif = 1 - old_cmp
        delta = int(lCmp[0])-last_time
        moy += dif*delta
        moyCarre += dif*dif*delta
        last_time = int(lCmp[0])
        old_cmp = float(lCmp[1]) 
        lCmp = fCmp.readline().split()

    dif = 1 - old_cmp
    delta = int(lCmp[0]) - last_time
    last_time = int(lCmp[0])
    moy += dif*delta
    moyCarre += dif*dif*delta

    outCat=0

    if last_time==0:
        out.write("%d\t%d\t%f\t%f\n"%(t,int(sys.argv[4]),0,0))
    else:
        out.write("%d\t%d\t%f\t%f\n"%(t,int(sys.argv[4]),moy/last_time,math.sqrt(abs(moyCarre*last_time-moy*moy))/last_time))
    
    # If file were swapped during computation, they are swapped back for the next distribution processing 
    if permuted:
        lCmp,fCmp,old_cmp,lRef,fRef,old_ref=lRef,fRef,old_ref,lCmp,fCmp,old_cmp
    lRef = fRef.readline()
    lCmp = fCmp.readline()
    
