Source code for procars.step_modules.compute_dcj_adjacencies

# -*- coding: utf-8 -*-:
"""
Copyright © Bonsai - LIFL (Université Lille 1, CNRS UMR 8022) and Inria-Lille Nord Europe

contact: aida.ouangraoua@inria.fr, amandine.perrin@inria.fr

This software is a computer program whose purpose is to progressively reconstruct ancestral
gene orders.

This software is governed by the CeCILL-B license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/or redistribute the software under the terms of the CeCILL-B
license as circulated by CEA, CNRS and Inria at the following URL
http://www.cecill.info, or in the LICENCE file at the root directory of this program.

The fact that you are presently reading this means that you have had
knowledge of the CeCILL-B license and that you accept its terms.

---------------------------------------------------

``compute_dcj_adjacencies`` **module description**:

This module is used to find DCJ-reliable adjacencies.
It is called when, at the previous step, both the sets of compatible adjacencies and
conflicting adjacencies are empty. Hence, we try to add new adjacencies by adding DCJ-reliable
ones, if possible.

.. moduleauthor:: Aïda Ouangraoua, Amandine PERRIN

February 2014

"""


from procars.utils import util_adjacency_functions as adj_func
from procars.utils import utils_IO
from procars.utils.util_classes import SparseMatrix


[docs]def compute_neighbor(spe, block, side, left, right): """ Finds the *side* (left (0) or right (1)) neighbor of the given block (*block*) Parameters ---------- spe : int Species id for which we want to find a neighbor block : int Block number of the species for which we want to find a neighbor side : int ``0`` for left extremity of the given block of the given species, ``1`` for right extremity left : list Array such that left[spe_id][block_id] is an array containing the left neighbor of block block_id in spe_id right : list Array such that right[spe_id][block_id] is an array containing the right neighbor of block_id in spe_id Returns ------- int *neighbor:* signed integer, neighboring block of the given block """ neighbor = 0 if (block > 0): if side == 0: neighbor = left[spe][block][1] else: neighbor = right[spe][block][1] else: if side == 0: neighbor = -right[spe][-block][1] else: neighbor = -left[spe][-block][1] return neighbor
[docs]def is_adjacency_in_car(cars, block_adjacency): """ Checks if a block adjacency belongs to the CARs Parameters ---------- cars : list List of all CARs of the current PQtree block_adjacency : tuple One given block adjacency to search among the CARs Returns ------- boolean True if the adjacency is found in the CARs, False otherwise """ answer = False [block1, block2] = block_adjacency found = False i = 0 while(not found and i < len(cars)): one_car = cars[i] j = 0 while(not found and j < len(one_car)): if(abs(one_car[j]) == abs(block1)): car_id = i pos_id = j found = True j += 1 i += 1 if not found: return False if((pos_id < len(cars[car_id]) - 1 and cars[car_id][pos_id] == block1 and cars[car_id][pos_id+1] == block2) or (pos_id > 0 and cars[car_id][pos_id] == -block1 and cars[car_id][pos_id-1] == -block2)): answer = True return answer
[docs]def is_dcj_reliable_conserved(partition, car_adjacency, block_adjacency, cars, left, right, car_left, car_right): """ Finds signal from a given car_adjacency in a given partition of the tree species. A car adjacency is DCJ-reliable if the signal of this DCJ adjacency is conserved on a path of the species tree that goes through the ancestor (from a group of the partition defined by the ancestor to another group of this same partition) Parameters ---------- partition : list Ordered list of lists of species IDs corresponding to a given combination of the 3 groups partition defined by the ancestor car_adjacency : tuple (car1, car2) car adjacency block_adjacency : tuple (bloc1, bloc2) block adjacency corresponding to the car adjacency cars : list List of lists of ordered signed block numbers in each car left : list Array such that left[spe_id][block_id] is an array containing the left neighbor of block block_id in spe_id right : list Array such that right[spe_id][block_id] is an array containing the right neighbor of block_id in spe_id car_left : list Array such that car_left[species_id][car_id] = list of potential left car neighbors of car_id in species 'species_id' car_right : list Array such that car_right[species_id][car_id] = list of potential right car neighbors of car_id in species 'species_id' Returns ------- boolean True if the DCJ-reliable adjacency is conserved, False otherwise """ answer = False block1, block2 = block_adjacency # partition[0] = species_ids in the first group of the given partition for spe in partition[0]: # find left neighbor of block2 in species spe block3 = compute_neighbor(spe, block2, 0, left, right) # find right neighbor of block1 in species spe block4 = compute_neighbor(spe, block1, 1, left, right) # if block3 and block4 exist and are an adjacency in the current PQtree, find signal in # species in other groups if (block3 != 0 and block4 != 0 and is_adjacency_in_car(cars, [block3, block4])): conserved = False for spe2 in partition[1] + partition[2]: # if (car1 car2) is a car adjacency in another species, it is a dcj-reliable adj. if is_car_adjacency(spe2, car_adjacency, car_left, car_right): conserved = True answer = conserved return answer
[docs]def is_car_adjacency(spe, car_adjacency, car_left, car_right): """ Checks if a given CAR adjacency is a possible car adjacency in the given species Parameters ---------- spe : int Species ID in which searching the car adjacency car_adjacency : tuple CAR adjacency to find car_left : list Array such that car_left[species_id][car_id] = list of potential left car neighbors of car_id in species 'species_id' car_right : list Array such that car_right[species_id][car_id] = list of potential right car neighbors of car_id in species 'species_id' Returns ------- boolean True if car_adjacency belongs to spe, False otherwise """ answer = False [car1, car2] = car_adjacency if ((car1 > 0 and car2 in car_right[spe][car1]) or (car1 < 0 and -car2 in car_left[spe][-car1])): answer = True return answer
[docs]def is_dcj_reliable(possible_partitions, car_adjacency, cars, left, right, car_left, car_right): """ Finds signal from a given car_adjacency in all possible partitions of the species tree. Parameters ---------- possible_partitions : list List of partitions, which are ordered lists of species IDs lists corresponding to different combinations of the 3 groups partition defined by the ancestor car_adjacency : tuple (car1, car2) car adjacency to test cars : list List of lists of ordered signed block numbers in each car left : list Array such that left[spe_id][block_id] is an array containing the left neighbor of block block_id in spe_id right : list Array such that right[spe_id][block_id] is an array containing the right neighbor of block_id in spe_id car_left : list Array such that car_left[species_id][car_id] = list of potential left car neighbors of car_id in species 'species_id' car_right : list Array such that car_right[species_id][car_id] = list of potential right car neighbors of car_id in species 'species_id' Returns ------- boolean True if the adjacency is dcj-reliable and conserved, False otherwise """ answer = False [car1, car2] = car_adjacency # find block adjacency involved in the given car adjacency: if(car1 > 0): block1 = cars[car1][-1] else: block1 = -cars[-car1][0] if(car2 > 0): block2 = cars[car2][0] else: block2 = -cars[-car2][-1] block_adjacency = [block1, block2] for partition in possible_partitions: if is_dcj_reliable_conserved(partition, car_adjacency, block_adjacency, cars, left, right, car_left, car_right): answer = True return answer return answer
[docs]def compute_adjacencymatrix_from_adjacency_array(adjacencies, nb_blocks): """ Function computing an adjacency matrix from an adjacency array Parameters ---------- adjacencies : list Array containing pairs of signed blocks, with abs(block1) < abs(block2) nb_blocks : int Total number of blocks Returns ------- SparseMatrix *adjacencymatrix:* 0-1 Matrix which rows and columns correspond to block extremities: - right extremity of bloc_id : extremity_id = block_id - left extremity of bloc_id : extremity_id = block_id + nb_blocks """ adjacencymatrix = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1) for adjacency in adjacencies: [block_1, block_2] = adjacency if(block_1 > 0): extremity_id1 = block_1 else: extremity_id1 = -block_1 + nb_blocks if(block_2 < 0): extremity_id2 = -block_2 else: extremity_id2 = block_2 + nb_blocks adjacencymatrix[extremity_id1, extremity_id2] = 1 adjacencymatrix[extremity_id2, extremity_id1] = 1 return adjacencymatrix
[docs]def find_possible_dcj_adjs(nb_cars, possible_partitions, cars, left, right, car_left, car_right): """ From a set of CARs in the current ancestral PQtree, and the list of CAR and block neighbors in all species, finds which CAR adjacencies are DCJ-reliable. A next step will check if adjacencies in this step are not in conflict. Parameters ---------- nb_cars : int Total number of cars in the current PQtree possible_partitions : list List of partitions, which are ordered lists of lists species ids corresponding to different combinations of the 3 groups partition defined by the ancestor cars : list List of lists of ordered signed block numbers in each car left : list Array such that left[spe_id][block_id] is an array containing the left neighbor of block block_id in spe_id right : list Array such that right[spe_id][block_id] is an array containing the right neighbor of block_id in spe_id car_left : list Array such that car_left[species_id][car_id] = list of potential left car neighbors of car_id in species 'species_id' car_right : list Array such that car_right[species_id][car_id] = list of potential right car neighbors of car_id in species 'species_id' Returns ------- list *car_adjacencies:* list of lists (possible DCJ-reliable CAR adjacencies) """ car_adjacencies = [] for car1 in xrange(1, nb_cars + 1): for car2 in xrange(car1 + 1, nb_cars + 1): if is_dcj_reliable(possible_partitions, [car1, car2], cars, left, right, car_left, car_right): car_adjacencies.append([car1, car2]) if is_dcj_reliable(possible_partitions, [car1, -car2], cars, left, right, car_left, car_right): car_adjacencies.append([car1, -car2]) if is_dcj_reliable(possible_partitions, [-car1, car2], cars, left, right, car_left, car_right): car_adjacencies.append([-car1, car2]) if is_dcj_reliable(possible_partitions, [-car1, -car2], cars, left, right, car_left, car_right): car_adjacencies.append([-car1, -car2]) return car_adjacencies
[docs]def write_retained_adjs(adj_filename_prefix, cars, dcj_NC_Adj, step, car_left, car_right, species1_id, species2_id, species3_id): """ Write all retained DCJ-reliable adjacencies into a txt file Parameters ---------- adj_filename_prefix : string Prefix of the file where retained adjacencies are stored cars : list List of ordered lists of signed blocks corresponding to the different cars, such that cars[car_id] = ordered list of blocks in car_id dcj_NC_Adj : list List of retained compatible set of dcj-reliable adjacencies step : int Current step in the whole method car_left : list Array such that car_left[species_id][car_id] = list of potential left car neighbors of car_id in species 'species_id' car_right : list Array such that car_right[species_id][car_id] = list of potential right car neighbors of car_id in species 'species_id' species1_id : list List of IDs of species in group1 of partition defined by the ancestor node species2_id : list List of IDs of species in group2 of partition defined by the ancestor node species3_id : list List of IDs of species in group3 of partition defined by the ancestor node """ last_adjs = adj_filename_prefix + "_" + str(step - 1) + ".txt" new_adjs = adj_filename_prefix + "_" + str(step) + ".txt" with open(last_adjs, "r") as last: all_last_adjs = last.readlines() with open(new_adjs, "w") as file_handler: for car_adjacency in dcj_NC_Adj: labels = adj_func.find_adj_info(car_adjacency, car_left, car_right, species1_id, species2_id, species3_id) utils_IO.write_adjacency(file_handler, cars, car_adjacency, 2, step, labels) file_handler.writelines(all_last_adjs)
[docs]def main(adjacency_file_name_prefix, step_nb, car_bin, neigh_bin, tree_bin, blocks_bin): """ Main method, to find DCJ-reliable adjacencies Parameters ---------- adjacency_file_name_prefix : string Prefix of the adjacncy file name step_nb : int Current step of the ProCars method car_bin : string Name of the binary file where all information about CARs is saved neigh_bin : string Name of the binary file where all information about the neighbors of each CAR is saved tree_bin : string Name of the binary file where all information about the species tree is saved blocks_bin : string Name of the binary file where all information about the blocks is saved Returns ------- tuple *step_nb:* current step of the ProCars method *dcj_NC_Adj:* list of all non-conflicting DCJ-reliable adjacencies *dcj_NC_Pair:* list of matrix indexes of non-conflicting DCJ-adjacencies. """ # READ PARAMETERS [_, _, _, _, _, _, species1_id, species2_id, species3_id, _] = utils_IO.read_binary_file(tree_bin) [_, _, left, right] = utils_IO.read_binary_file(blocks_bin) [cars, nb_cars, _, _] = utils_IO.read_binary_file(car_bin) [car_left, car_right] = utils_IO.read_binary_file(neigh_bin) # Find a list of DCJ-reliable adjacencies (possibly in conflict) possible_partitions = [[species1_id, species2_id, species3_id], [species2_id, species1_id, species3_id], [species3_id, species1_id, species2_id]] car_adjacencies = find_possible_dcj_adjs(nb_cars, possible_partitions, cars, left, right, car_left, car_right) dcj_Matrix = compute_adjacencymatrix_from_adjacency_array(car_adjacencies, nb_cars) [dcj_NC_Matrix, _] = adj_func.split_adjacencymatrix(dcj_Matrix, nb_cars) dcj_NC_Pair = adj_func.compute_extremity_pair(dcj_NC_Matrix) dcj_NC_Adj = adj_func.compute_adjacencies(dcj_NC_Pair, nb_cars) write_retained_adjs(adjacency_file_name_prefix, cars, dcj_NC_Adj, step_nb, car_left, car_right, species1_id, species2_id, species3_id) print("... " + str(len(dcj_NC_Adj)) + " dcj reliable non-conflicting adjacencies") return dcj_NC_Adj, step_nb, dcj_NC_Pair