#!/bin/env python
#The script removes $rm_contacts percent of randomly chosen contacts from the input binary contact matrix.
#for i in range(0,res_num):
#	cmap_matrix[i]=
#Contacts (ones) are changed into non-contacts (zeros)
#USAGE:
#cmap_reduce_cmap.py <percent_of_contacts_to_be_retained> <matrix_input_file>

#import __main__

def no_rep_randint(low,high,size):
        random_numbers=numpy.round(numpy.random.uniform(low,high,size))
        removed_indices=numpy.unique(random_numbers) #Randomly chosen contacts mustn't repeat
        while len(removed_indices)<contacts_removed: #Chosing additonal contacts to replace the ones that repeated
                add_rm_contacts=numpy.round(numpy.random.uniform(low,high,(size[0]-len(removed_indices),)))
                removed_indices=numpy.unique(numpy.concatenate((removed_indices,add_rm_contacts)))
                print '*',
        return removed_indices




import sys
import numpy
import scipy.sparse



retain_percent=float(sys.argv[1]) #the sys.argv holds the input command line arguments
matrix_file_path=sys.argv[2]
out_matrix_file_path=matrix_file_path.partition('.')
random=numpy.random.randint(10000)
out_matrix_file_path="%s_red_%d_%d.cmap" % (out_matrix_file_path[0],retain_percent,random)

print "Reading cmap: ", matrix_file_path
matrixFILE=open(matrix_file_path,'r')
lines=matrixFILE.readlines()
res_num=len(lines)
print "Protein length: ",res_num

cmap_matrix=numpy.zeros(shape=(res_num,res_num))
for i in range(0,res_num):
	row=lines[i].split()
	cmap_matrix[i]=row

print "Contact map size: ", cmap_matrix.shape
all_contacts=sum(sum(cmap_matrix))

contacts_total=int(all_contacts/2)
contacts_retained=round(contacts_total*retain_percent/100)
contacts_removed=contacts_total-contacts_retained

print "Amino acid pairs in contact : %d" %(contacts_total)
print "Retained contacts: %d" %(contacts_retained)
print "Removed contacts: %d" %(contacts_removed)  

contacts=numpy.zeros(shape=(all_contacts,2),dtype='intc')
(contacts[:,0],contacts[:,1])=numpy.nonzero(cmap_matrix)

single_half=contacts[contacts[:,0]<contacts[:,1],:]

print "Diagonal sum:",sum(cmap_matrix.diagonal())
print "Contacts:", contacts.shape
print "Single half:", single_half.shape
#Removing random contacts
print "Chosing random contacts for removal"

removed_indices=no_rep_randint(0,contacts_total-1,(contacts_removed,))

print "\nRemoving %d contacts"% (len(removed_indices))

data=numpy.ones(all_contacts)
data[removed_indices.astype('intc')]=0
data[contacts_total+removed_indices.astype('intc')]=0
full_matrix=numpy.concatenate((single_half, single_half[:,::-1]))
print "Full cmap: ",full_matrix.shape
print "Modified positions: ",data.shape
rebuilt=scipy.sparse.coo_matrix((data,full_matrix.transpose()),shape=(res_num,res_num)).todense()


print "Writing reduced cmap to: %s" %(out_matrix_file_path) 
out_matrixFILE=open(out_matrix_file_path,'w')
for i in range(0,len(rebuilt)):
	rebuilt[i,:].tofile(out_matrixFILE,sep="\t",format="%d")
	out_matrixFILE.write('\n')


