# -*- coding: utf-8 -*-:
"""
Copyright © Bonsai - LIFL (Université Lille 1, CNRS UMR 8022) and Inria-Lille Nord Europe
contact: aida.ouangraoua@inria.fr, amandine.perrin@inria.fr
This software is a computer program whose purpose is to progressively reconstruct ancestral
gene orders.
This software is governed by the CeCILL-B license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/or redistribute the software under the terms of the CeCILL-B
license as circulated by CEA, CNRS and Inria at the following URL
http://www.cecill.info, or in the LICENCE file at the root directory of this program.
The fact that you are presently reading this means that you have had
knowledge of the CeCILL-B license and that you accept its terms.
---------------------------------------------------
``compute_dcj_adjacencies`` **module description**:
This module is used to find DCJ-reliable adjacencies.
It is called when, at the previous step, both the sets of compatible adjacencies and
conflicting adjacencies are empty. Hence, we try to add new adjacencies by adding DCJ-reliable
ones, if possible.
.. moduleauthor:: Aïda Ouangraoua, Amandine PERRIN
February 2014
"""
from procars.utils import util_adjacency_functions as adj_func
from procars.utils import utils_IO
from procars.utils.util_classes import SparseMatrix
[docs]def compute_neighbor(spe, block, side, left, right):
""" Finds the *side* (left (0) or right (1)) neighbor of the given block (*block*)
Parameters
----------
spe : int
Species id for which we want to find a neighbor
block : int
Block number of the species for which we want to find a neighbor
side : int
``0`` for left extremity of the given block of the given species, ``1`` for right extremity
left : list
Array such that left[spe_id][block_id] is an array containing the left
neighbor of block block_id in spe_id
right : list
Array such that right[spe_id][block_id] is an array containing the right
neighbor of block_id in spe_id
Returns
-------
int
*neighbor:* signed integer, neighboring block of the given block
"""
neighbor = 0
if (block > 0):
if side == 0:
neighbor = left[spe][block][1]
else:
neighbor = right[spe][block][1]
else:
if side == 0:
neighbor = -right[spe][-block][1]
else:
neighbor = -left[spe][-block][1]
return neighbor
[docs]def is_adjacency_in_car(cars, block_adjacency):
""" Checks if a block adjacency belongs to the CARs
Parameters
----------
cars : list
List of all CARs of the current PQtree
block_adjacency : tuple
One given block adjacency to search among the CARs
Returns
-------
boolean
True if the adjacency is found in the CARs, False otherwise
"""
answer = False
[block1, block2] = block_adjacency
found = False
i = 0
while(not found and i < len(cars)):
one_car = cars[i]
j = 0
while(not found and j < len(one_car)):
if(abs(one_car[j]) == abs(block1)):
car_id = i
pos_id = j
found = True
j += 1
i += 1
if not found:
return False
if((pos_id < len(cars[car_id]) - 1 and cars[car_id][pos_id] == block1
and cars[car_id][pos_id+1] == block2) or
(pos_id > 0 and cars[car_id][pos_id] == -block1 and
cars[car_id][pos_id-1] == -block2)):
answer = True
return answer
[docs]def is_dcj_reliable_conserved(partition, car_adjacency, block_adjacency, cars,
left, right, car_left, car_right):
""" Finds signal from a given car_adjacency in a given partition of the tree species.
A car adjacency is DCJ-reliable if the signal of this DCJ adjacency is conserved
on a path of the species tree that goes through the ancestor (from a group of the partition
defined by the ancestor to another group of this same partition)
Parameters
----------
partition : list
Ordered list of lists of species IDs corresponding to a given
combination of the 3 groups partition defined by the ancestor
car_adjacency : tuple
(car1, car2) car adjacency
block_adjacency : tuple
(bloc1, bloc2) block adjacency corresponding to the car adjacency
cars : list
List of lists of ordered signed block numbers in each car
left : list
Array such that left[spe_id][block_id] is an array containing the left
neighbor of block block_id in spe_id
right : list
Array such that right[spe_id][block_id] is an array containing the right
neighbor of block_id in spe_id
car_left : list
Array such that car_left[species_id][car_id] = list of potential left car
neighbors of car_id in species 'species_id'
car_right : list
Array such that car_right[species_id][car_id] = list of potential right car
neighbors of car_id in species 'species_id'
Returns
-------
boolean
True if the DCJ-reliable adjacency is conserved, False otherwise
"""
answer = False
block1, block2 = block_adjacency
# partition[0] = species_ids in the first group of the given partition
for spe in partition[0]:
# find left neighbor of block2 in species spe
block3 = compute_neighbor(spe, block2, 0, left, right)
# find right neighbor of block1 in species spe
block4 = compute_neighbor(spe, block1, 1, left, right)
# if block3 and block4 exist and are an adjacency in the current PQtree, find signal in
# species in other groups
if (block3 != 0 and block4 != 0 and is_adjacency_in_car(cars, [block3, block4])):
conserved = False
for spe2 in partition[1] + partition[2]:
# if (car1 car2) is a car adjacency in another species, it is a dcj-reliable adj.
if is_car_adjacency(spe2, car_adjacency, car_left, car_right):
conserved = True
answer = conserved
return answer
[docs]def is_car_adjacency(spe, car_adjacency, car_left, car_right):
""" Checks if a given CAR adjacency is a possible car adjacency in the given species
Parameters
----------
spe : int
Species ID in which searching the car adjacency
car_adjacency : tuple
CAR adjacency to find
car_left : list
Array such that car_left[species_id][car_id] = list of potential left car
neighbors of car_id in species 'species_id'
car_right : list
Array such that car_right[species_id][car_id] = list of potential right car
neighbors of car_id in species 'species_id'
Returns
-------
boolean
True if car_adjacency belongs to spe, False otherwise
"""
answer = False
[car1, car2] = car_adjacency
if ((car1 > 0 and car2 in car_right[spe][car1]) or
(car1 < 0 and -car2 in car_left[spe][-car1])):
answer = True
return answer
[docs]def is_dcj_reliable(possible_partitions, car_adjacency, cars, left, right, car_left, car_right):
""" Finds signal from a given car_adjacency in all possible partitions of the species tree.
Parameters
----------
possible_partitions : list
List of partitions, which are ordered lists of species IDs lists corresponding to different
combinations of the 3 groups partition defined by the ancestor
car_adjacency : tuple
(car1, car2) car adjacency to test
cars : list
List of lists of ordered signed block numbers in each car
left : list
Array such that left[spe_id][block_id] is an array containing the left
neighbor of block block_id in spe_id
right : list
Array such that right[spe_id][block_id] is an array containing the right
neighbor of block_id in spe_id
car_left : list
Array such that car_left[species_id][car_id] = list of potential left car
neighbors of car_id in species 'species_id'
car_right : list
Array such that car_right[species_id][car_id] = list of potential right car
neighbors of car_id in species 'species_id'
Returns
-------
boolean
True if the adjacency is dcj-reliable and conserved, False otherwise
"""
answer = False
[car1, car2] = car_adjacency
# find block adjacency involved in the given car adjacency:
if(car1 > 0):
block1 = cars[car1][-1]
else:
block1 = -cars[-car1][0]
if(car2 > 0):
block2 = cars[car2][0]
else:
block2 = -cars[-car2][-1]
block_adjacency = [block1, block2]
for partition in possible_partitions:
if is_dcj_reliable_conserved(partition, car_adjacency, block_adjacency,
cars, left, right, car_left, car_right):
answer = True
return answer
return answer
[docs]def compute_adjacencymatrix_from_adjacency_array(adjacencies, nb_blocks):
""" Function computing an adjacency matrix from an adjacency array
Parameters
----------
adjacencies : list
Array containing pairs of signed blocks, with abs(block1) < abs(block2)
nb_blocks : int
Total number of blocks
Returns
-------
SparseMatrix
*adjacencymatrix:* 0-1 Matrix which rows and columns correspond to block extremities:
- right extremity of bloc_id : extremity_id = block_id
- left extremity of bloc_id : extremity_id = block_id + nb_blocks
"""
adjacencymatrix = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1)
for adjacency in adjacencies:
[block_1, block_2] = adjacency
if(block_1 > 0):
extremity_id1 = block_1
else:
extremity_id1 = -block_1 + nb_blocks
if(block_2 < 0):
extremity_id2 = -block_2
else:
extremity_id2 = block_2 + nb_blocks
adjacencymatrix[extremity_id1, extremity_id2] = 1
adjacencymatrix[extremity_id2, extremity_id1] = 1
return adjacencymatrix
[docs]def find_possible_dcj_adjs(nb_cars, possible_partitions, cars, left, right, car_left, car_right):
""" From a set of CARs in the current ancestral PQtree, and the list of CAR and block
neighbors in all species, finds which CAR adjacencies are DCJ-reliable. A next step will
check if adjacencies in this step are not in conflict.
Parameters
----------
nb_cars : int
Total number of cars in the current PQtree
possible_partitions : list
List of partitions, which are ordered lists of lists species ids corresponding to different
combinations of the 3 groups partition defined by the ancestor
cars : list
List of lists of ordered signed block numbers in each car
left : list
Array such that left[spe_id][block_id] is an array containing the left
neighbor of block block_id in spe_id
right : list
Array such that right[spe_id][block_id] is an array containing the right
neighbor of block_id in spe_id
car_left : list
Array such that car_left[species_id][car_id] = list of potential left car
neighbors of car_id in species 'species_id'
car_right : list
Array such that car_right[species_id][car_id] = list of potential right car
neighbors of car_id in species 'species_id'
Returns
-------
list
*car_adjacencies:* list of lists (possible DCJ-reliable CAR adjacencies)
"""
car_adjacencies = []
for car1 in xrange(1, nb_cars + 1):
for car2 in xrange(car1 + 1, nb_cars + 1):
if is_dcj_reliable(possible_partitions, [car1, car2], cars, left, right,
car_left, car_right):
car_adjacencies.append([car1, car2])
if is_dcj_reliable(possible_partitions, [car1, -car2], cars, left, right,
car_left, car_right):
car_adjacencies.append([car1, -car2])
if is_dcj_reliable(possible_partitions, [-car1, car2], cars, left, right,
car_left, car_right):
car_adjacencies.append([-car1, car2])
if is_dcj_reliable(possible_partitions, [-car1, -car2], cars, left, right,
car_left, car_right):
car_adjacencies.append([-car1, -car2])
return car_adjacencies
[docs]def write_retained_adjs(adj_filename_prefix, cars, dcj_NC_Adj, step,
car_left, car_right, species1_id, species2_id, species3_id):
""" Write all retained DCJ-reliable adjacencies into a txt file
Parameters
----------
adj_filename_prefix : string
Prefix of the file where retained adjacencies are stored
cars : list
List of ordered lists of signed blocks corresponding to the different cars, such that
cars[car_id] = ordered list of blocks in car_id
dcj_NC_Adj : list
List of retained compatible set of dcj-reliable adjacencies
step : int
Current step in the whole method
car_left : list
Array such that car_left[species_id][car_id] = list of potential left car
neighbors of car_id in species 'species_id'
car_right : list
Array such that car_right[species_id][car_id] = list of potential right car
neighbors of car_id in species 'species_id'
species1_id : list
List of IDs of species in group1 of partition defined by the ancestor node
species2_id : list
List of IDs of species in group2 of partition defined by the ancestor node
species3_id : list
List of IDs of species in group3 of partition defined by the ancestor node
"""
last_adjs = adj_filename_prefix + "_" + str(step - 1) + ".txt"
new_adjs = adj_filename_prefix + "_" + str(step) + ".txt"
with open(last_adjs, "r") as last:
all_last_adjs = last.readlines()
with open(new_adjs, "w") as file_handler:
for car_adjacency in dcj_NC_Adj:
labels = adj_func.find_adj_info(car_adjacency, car_left, car_right,
species1_id, species2_id, species3_id)
utils_IO.write_adjacency(file_handler, cars, car_adjacency, 2, step, labels)
file_handler.writelines(all_last_adjs)
[docs]def main(adjacency_file_name_prefix, step_nb, car_bin, neigh_bin, tree_bin, blocks_bin):
"""
Main method, to find DCJ-reliable adjacencies
Parameters
----------
adjacency_file_name_prefix : string
Prefix of the adjacncy file name
step_nb : int
Current step of the ProCars method
car_bin : string
Name of the binary file where all information about CARs is saved
neigh_bin : string
Name of the binary file where all information about the neighbors of each CAR is saved
tree_bin : string
Name of the binary file where all information about the species tree is saved
blocks_bin : string
Name of the binary file where all information about the blocks is saved
Returns
-------
tuple
*step_nb:* current step of the ProCars method
*dcj_NC_Adj:* list of all non-conflicting DCJ-reliable adjacencies
*dcj_NC_Pair:* list of matrix indexes of non-conflicting DCJ-adjacencies.
"""
# READ PARAMETERS
[_, _, _, _, _, _, species1_id,
species2_id, species3_id, _] = utils_IO.read_binary_file(tree_bin)
[_, _, left, right] = utils_IO.read_binary_file(blocks_bin)
[cars, nb_cars, _, _] = utils_IO.read_binary_file(car_bin)
[car_left, car_right] = utils_IO.read_binary_file(neigh_bin)
# Find a list of DCJ-reliable adjacencies (possibly in conflict)
possible_partitions = [[species1_id, species2_id, species3_id],
[species2_id, species1_id, species3_id],
[species3_id, species1_id, species2_id]]
car_adjacencies = find_possible_dcj_adjs(nb_cars, possible_partitions, cars,
left, right, car_left, car_right)
dcj_Matrix = compute_adjacencymatrix_from_adjacency_array(car_adjacencies, nb_cars)
[dcj_NC_Matrix, _] = adj_func.split_adjacencymatrix(dcj_Matrix, nb_cars)
dcj_NC_Pair = adj_func.compute_extremity_pair(dcj_NC_Matrix)
dcj_NC_Adj = adj_func.compute_adjacencies(dcj_NC_Pair, nb_cars)
write_retained_adjs(adjacency_file_name_prefix, cars, dcj_NC_Adj, step_nb, car_left, car_right,
species1_id, species2_id, species3_id)
print("... " + str(len(dcj_NC_Adj)) + " dcj reliable non-conflicting adjacencies")
return dcj_NC_Adj, step_nb, dcj_NC_Pair