Source code for procars.step_modules.compute_conserved_adjacencies

#!/usr/bin/env python
# -*- coding: utf-8 -*-:

"""
Copyright © Bonsai - LIFL (Université Lille 1, CNRS UMR 8022) and Inria-Lille Nord Europe

contact: aida.ouangraoua@inria.fr, amandine.perrin@inria.fr

This software is a computer program whose purpose is to progressively reconstruct ancestral
gene orders.

This software is governed by the CeCILL-B license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/or redistribute the software under the terms of the CeCILL-B
license as circulated by CEA, CNRS and Inria at the following URL
http://www.cecill.info, or in the LICENCE file at the root directory of this program.

The fact that you are presently reading this means that you have had
knowledge of the CeCILL-B license and that you accept its terms.

---------------------------------------------------

``compute_conserved_adjacencies`` **module description**:

From a CAR file, this module computes a set of conserved  adjacencies between cars
for the ancestor node, and it splits this set into :

    * a set of fully conserved adjacencies and a set of partly conserved adjacencies first
    * among the two sets, a set of non-conflicting adjacencies and a set of conflicting adjacencies

.. moduleauthor:: Aïda Ouangraoua, Amandine PERRIN

February 2014

"""


from procars.utils.util_classes import SparseMatrix
from procars.utils import util_adjacency_functions as adj_func
from procars.utils import utils_IO


[docs]def compute_blocks_cars(nb_species, blocks, block_to_car): """ Computes the list of blocks' cars for each species Parameters ---------- nb_species : int Total number of species blocks : list array of arrays (blocks for each species) blocks = [[*list of blocks for spe_id=0*], [*list of blocks for spe_id=1*], ...] with *list of blocks for spe_id=1* = [[*list of blocks for spe_id=1, chromosome 1*], [*list of blocks for spe_id=1, chromosome 2*], ...] with *list of blocks for spe_id=1, chromosome 1* = [[start, end, num_bloc, orient], [start, end, num_bloc, orient], ... for the n blocks] block_to_car : list Integer array such that block_2_car[block_id] = car_id to which block_id belongs Returns ------- list array of arrays (blocks' cars) blocks_car = [[*list of block_cars for spe_id=0*], [*list of block_cars for spe_id=1*], ...] with *list of block_cars for spe_id=1* = [[*list of blocks' cars for spe_id=1, chromosome 1*], [*list of blocks' cars for spe_id=1, chromosome 2*], ...] """ # array containing arrays (one for each species) same as array blocks where blocks are replaced # by corresponding cars blocks_car = [] # sort blocks by ascending position in each chr of each species for spe_id in range(nb_species): # for each chr for chr_id in range(len(blocks[spe_id])): # sort blocks by start position blocks[spe_id][chr_id].sort() # for each species for spe_id in range(nb_species): blocks_car.append([]) # for each chromosomes in species for chr_id in range(len(blocks[spe_id])): blocks_car[spe_id].append([]) # for each block in chromosome for signed_block in blocks[spe_id][chr_id]: block_id = signed_block[2] blocks_car[spe_id][chr_id].append(block_to_car[block_id]) return blocks_car
[docs]def commons_read_cars(nb_blocks, blocks, nb_species, car_file_name, step_nb, binary_file_name): """ Function reading blocks from a block file, reading car file, and computing block neighbors Parameters ---------- nb_blocks : int Number of blocks blocks : list array of arrays (blocks for each species) blocks = [[*list of blocks for spe_id=0*], [*list of blocks for spe_id=1*], ...] with *list of blocks for spe_id=1* = [[*list of blocks for spe_id=1, chromosome 1*], [*list of blocks for spe_id=1, chromosome 2*], ...] with *list of blocks for spe_id=1, chromosome 1* = [[start, end, num_bloc, orient], [start, end, num_bloc, orient], ... for the n blocks] nb_species : int Total number of species car_file_name : String Name of the PQtree file where current CARs are stored step_nb : int Current step of the ProCars method binary_file_name : String Name of the binary file where information about current cars must be saved Returns ------- tuple *cars:* array of arrays (cars) *nb_cars:* integer, total number of cars *block_position_in_car:* integer array *blocks_car:* array of arrays (blocks' cars): see return of **compute_blocks_cars** """ car_file_name += "_" + str(int(step_nb) - 1) + ".txt" cars, block_2_car, block_position_in_car = utils_IO.read_car_file(car_file_name, nb_blocks) nb_cars = len(cars) - 1 # Compute same as array blocks where blocks are replaced by corresponding cars blocks_car = compute_blocks_cars(nb_species, blocks, block_2_car) values_saved = [cars, nb_cars, block_position_in_car, blocks_car] # save computed values in a binary file utils_IO.save_binary_information(binary_file_name, values_saved) return cars, nb_cars, block_position_in_car, blocks_car
[docs]def compute_potential_cars_neighbors(spe_id, blocks, blocks_car): """ Compute potential pairs of neighboring cars Parameters ---------- spe_id : int ID of current species blocks : list Positions of blocks for each species (see **commons_read_cars**) blocks_car : list Blocks' CARs for each species. See return of **compute_blocks_cars** Returns ------- tuple *neighbors:* pair of cars having consecutive segments in species *segments_neighbors:* composition of corresponding segments (in terms of signed blocks) """ neighbors = [] # pair of cars having consecutive segments in species segments_neighbors = [] # composition of corresponding segments (in terms of signed blocks) # for each chromosome in species 'spe_id', find segments of consecutive identical cars for chr_id in range(len(blocks_car[spe_id])): neighbors.append([]) # add empty pair of cars to begin segments_neighbors.append([]) # add empty pair of segments to begin last_car = 0 # last car processed at the previous loop last_segment = 0 # segment corresponding to last car i = 0 while(i < len(blocks_car[spe_id][chr_id])): # current car car_id = blocks_car[spe_id][chr_id][i] # corresponding segment segment = [] # recover car segment # while we are in the same current cars while(i < len(blocks_car[spe_id][chr_id]) and blocks_car[spe_id][chr_id][i] == car_id): block_id = blocks[spe_id][chr_id][i][2] sign = blocks[spe_id][chr_id][i][3] segment.append(sign * block_id) i += 1 # add new pair of potential neighboring cars # and, add the pair of corresponding segments # complete an uncomplete pair of neighboring cars if(len(neighbors[-1]) < 2): neighbors[-1].append(car_id) segments_neighbors[-1].append(segment) # add a new pair of neighboring cars else: neighbors.append([last_car, car_id]) segments_neighbors.append([last_segment, segment]) last_car = car_id last_segment = segment return neighbors, segments_neighbors
[docs]def reverse(segment): """ Function producing the reverse of a segment Examples -------- For example:: reverse([1 -2 3]) = [-3 2 -1] Parameters ---------- segment : list Segment of CAR IDs Returns ------- list *segment_rev:* reverse of given segment """ segment_rev = [] # array : reverse of segment for signed_block in segment: segment_rev = [-signed_block] + segment_rev return segment_rev
[docs]def segment_car_common_extremity(segment, car, block_position_in_car, side_segment, side_car): """ Function checking if a segment and a car share a same extremity region in terms of synteny. Parameters ---------- segment : list Segment of CAR IDs car : list Ordered list of signed blocks in the given CAR block_position_in_car : list Integer array such that block_position_in_car[block_id] = position of block in car to which it belongs side_segment : int ``1`` if we look at right end of the segment, ``0`` otherwise side_car : int ``1`` if we look at right end of the CAR, ``0`` otherwise Returns ------- boolean True if it shares an extremity, False otherwise Examples -------- * segment_car_common_extremity([1 -2 4 -3], [1 2 3 4], 1, 1) = True, because they share a end region composed of blocks 3 and 4. * segment_car_common_extremity([1 -2 4 -3], [1 2 3 4], 0, 0) = True, because they share a begin region composed of blocks 1 and 2. * segment_car_common_extremity([1 -2 4 -3], [-3 4 2 -1], 0, 1) = True, because they do share a same region that is the beginning of segment and the end of car """ answer = False segment_rev = segment if(side_segment == 1): segment_rev = reverse(segment) extremity_position = 0 far_position = 0 if(side_car == 1): extremity_position = len(car) - 1 far_position = len(car) - 1 i = 0 while((i < len(segment_rev)) and (i < 2 or i != abs(far_position - extremity_position) + 1) and (i < len(car) - 1)): signed_block = segment_rev[i] block_position = block_position_in_car[abs(signed_block)] if(side_car == 0 and block_position > far_position): far_position = block_position if(side_car == 1 and block_position < far_position): far_position = block_position i += 1 if (i >= 2 and i == abs(far_position - extremity_position) + 1): answer = True return answer
[docs]def compute_extremity(segment, car_id, cars, block_position_in_car, orientation): """ Function computing the orientation of a CAR in a CAR adjacency. Parameters ---------- segment : list List of block IDs car_id : int Given CAR id cars : list List of all current cars block_position_in_car : list Integer array such that block_position_in_car[block_id] = position of block in car to which it belongs orientation : int ``1`` if positive orientation, ``-1`` otherwise Returns ------- int *extremity:* oriented car, or 0 if segment is not an extremity of car """ extremity = 0 # segment with conserved block extremity if(segment[-1] == cars[car_id][-1]): extremity = car_id elif(segment[-1] == -cars[car_id][0]): extremity = -car_id # segment with conserved (unsigned) extremity elif(len(segment) >= 2): # segment with conserved (unsigned) block extremity if(abs(segment[-1]) == abs(cars[car_id][-1])): extremity = car_id elif(abs(segment[-1]) == abs(cars[car_id][0])): extremity = -car_id # segment with conserved (unsigned) segment extremity else: if(segment_car_common_extremity(segment, cars[car_id], block_position_in_car, 1, 1) == 1): extremity = car_id elif(segment_car_common_extremity(segment, cars[car_id], block_position_in_car, 1, 0) == 1): extremity = -car_id # if orientation is reversed, then reverse extremity if(orientation == -1): extremity = -extremity return extremity
[docs]def check_potential_cars_neighbors(spe_id, neighbors, segments_neighbors, cars, block_position_in_car, left, right): """ For each pair of potential neighboring CARs, check if the pair of segments really supports the adjacency Parameters ---------- spe_id : int ID of species to check neighbors : list List of potential neighbors (with CAR ids). **Ex:** [[], [1, 2], [2, 3], ...] segments_neighbors : list List of potential segment neighbors. **Ex.** [[], [[1], [2]], [[2], [3]], ...] cars : list List of ordered cars block_position_in_car : list Integer array such that block_position_in_car[block_id] = position of block in car to which it belongs left : list Left neighbors of species for all blocks right : list Left neighbors of species for all blocks Returns ------- tuple *left* and *right* valid neighbors """ # for each valid pair of cars in array neighbors for num, neighs in enumerate(neighbors): if(len(neighs) > 1): [car1, car2] = neighs [segment1, segment2] = segments_neighbors[num] extremity1 = compute_extremity(segment1, car1, cars, block_position_in_car, 1) extremity2 = compute_extremity(reverse(segment2), car2, cars, block_position_in_car, -1) if(extremity1 != 0 and extremity2 != 0): if (extremity1 == car1): right[spe_id][car1].append(extremity2) else: left[spe_id][car1].append(-extremity2) if (extremity2 == car2): left[spe_id][car2].append(extremity1) else: right[spe_id][car2].append(-extremity1) return left, right
[docs]def compute_cars_neighbors(nb_cars, nb_species, blocks, cars, blocks_car, block_position_in_car, binary_file_name): """ Finds all possible neighbors for each CAR Parameters ---------- nb_cars : int Total number of CARs nb_species : int Total number of species blocks : list All blocks, for each species: array of arrays (blocks for each species) blocks = [[*list of blocks for spe_id=0*], [*list of blocks for spe_id=1*], ...] with *list of blocks for spe_id=1* = [[*list of blocks for spe_id=1, chromosome 1*], [*list of blocks for spe_id=1, chromosome 2*], ...] with *list of blocks for spe_id=1, chromosome 1* = [[start, end, num_bloc, orient], [start, end, num_bloc, orient], ... for the n blocks] cars : list List of cars with signed blocks in each one blocks_cars : list blocks_car = [[*list of block_cars for spe_id=0*], [*list of block_cars for spe_id=1*], ...] with *list of block_cars for spe_id=1* = [[*list of blocks' cars for spe_id=1, chromosome 1*], [*list of blocks' cars for spe_id=1, chromosome 2*], ...] block_position_in_car : list Integer array such that block_position_in_car[block_id] = position of block in car to which it belongs Returns ------- list *left* car neighbors and *right* car neighbors such that left[spe_id][car_id] = [list of left neighbors of car_id in species spe_id] """ left = [] # array: integer(species_id) -> array(storing left neighbors) right = [] # array: integer(species_id) -> array(storing right neighbors) # add cells to arrays left, and right for spe_id in range(nb_species): left.append([]) right.append([]) for _ in range(nb_cars+1): left[spe_id].append([]) right[spe_id].append([]) # for each species for spe_id in range(nb_species): # First, compute potential pairs of neighboring cars neighbors, segments_neighbors = compute_potential_cars_neighbors(spe_id, blocks, blocks_car) # Second, for each pair of potential neighboring cars, check if the pair of # segments supports an adjacency check_potential_cars_neighbors(spe_id, neighbors, segments_neighbors, cars, block_position_in_car, left, right) values_saved = [left, right] utils_IO.save_binary_information(binary_file_name, values_saved) return left, right
[docs]def fill_conserved_arrays(conserved_left_D, conserved_right_D, conserved_left, conserved_right, left, right, species1_id, species2_id, species3_id, block1, block2): """ Method filling the conservation status arrays for a given adjacency *block1, block2* Parameters ---------- conserved_left_D : list Array of left fully conserved adjacencies to be filled conserved_right_D : list Array of right fully conserved adjacencies to be filled conserved_left : list Array of left conserved adjacencies to be filled conserved_right : list Array of right conserved adjacencies to be filled left : list Array such that left[block_id] is an array containing all left neighbors of block block_id right : list Array such that right[block_id] is an array containing all right neighbors of block_id species1_id: list IDs of group1 species species2_id: list IDs of group2 species species3_id: list IDs of group3 species block1: int First block id of the given adjacency block2: int Second block id of the given adjacency """ potential_adjacencies = [[block1, block2], [block1, -block2], [-block1, block2], [-block1, -block2]] for [signed_block1, signed_block2] in potential_adjacencies: support = adj_func.conservation_status([signed_block1, signed_block2], left, right, species1_id, species2_id, species3_id)[0] # if adjacency found in the 3 groups: fully conserved adjacency if(support == 3): if(signed_block1 > 0): conserved_right_D[signed_block1].append(signed_block2) else: conserved_left_D[-signed_block1].append(-signed_block2) if(signed_block2 > 0): conserved_left_D[signed_block2].append(signed_block1) else: conserved_right_D[-signed_block2].append(-signed_block1) # if not fully conserved adjacency, but present in at least one group: # partly conserved adjacency elif(support > 1): if(signed_block1 > 0): conserved_right[signed_block1].append(signed_block2) else: conserved_left[-signed_block1].append(-signed_block2) if(signed_block2 > 0): conserved_left[signed_block2].append(signed_block1) else: conserved_right[-signed_block2].append(-signed_block1)
[docs]def compute_conserved_adjacencies(left, right, nb_blocks, species1_id, species2_id, species3_id): """ Procedure computing the conservation status of all potential adjacencies Fills arrays for fully conserved adjacencies (conserved_left/right_D) and partly conserved adjacencies (conserved_left/right) Parameters ---------- left : list Array such that left[block_id] is an array containing all left neighbors of block block_id right : list Array such that right[block_id] is an array containing all right neighbors of block_id nb_blocks : int Total number of blocks species1_id: list IDs of group1 species species2_id: list IDs of group2 species species3_id: list IDs of group3 species Returns ------- list *conserved_left_D:* array of left fully conserved adjacencies *conserved_right_D:* array of right fully conserved adjacencies *conserved_left:* array of left conserved adjacencies *conserved_right:* array of right conserved adjacencies """ # initialize arrays storing doubly conserved adjacencies conserved_left_D = [[] for _ in xrange(nb_blocks + 1)] conserved_right_D = [[] for _ in xrange(nb_blocks + 1)] # initialize arrays storing simply conserved adjacencies conserved_left = [[] for _ in xrange(nb_blocks + 1)] conserved_right = [[] for _ in xrange(nb_blocks + 1)] # Find status of all possible adjacencies for block1 in xrange(1, nb_blocks + 1): for block2 in xrange(block1 + 1, nb_blocks + 1): # fill arrays with this adjacency fill_conserved_arrays(conserved_left_D, conserved_right_D, conserved_left, conserved_right, left, right, species1_id, species2_id, species3_id, block1, block2) return conserved_left_D, conserved_right_D, conserved_left, conserved_right
[docs]def compute_adjacencymatrix_from_neighbor_arrays(left, right, nb_blocks): """ Function computing an adjacency matrix from left and right neighbors Parameters ---------- left : list Array such that left[block_id] is an array containing all left neighbors of block block_id right : list Array such that right[block_id] is an array containing all right neighbors of block_id nb_blocks : int Total number of blocks Returns ------- SparseMatrix *adjacencymatrix:* 0-1 Matrix which rows and columns correspond to block extremities with: - right extremity of bloc_id : extremity_id = block_id - left extremity of bloc_id : extremity_id = block_id + nb_blocks """ adjacencymatrix = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1) for block_id in xrange(1, nb_blocks + 1): extremity_id1 = block_id extremity_id2 = 0 for right_block in right[block_id]: if(right_block < 0): extremity_id2 = abs(right_block) else: extremity_id2 = right_block + nb_blocks adjacencymatrix[extremity_id1, extremity_id2] = 1 extremity_id1 = block_id + nb_blocks extremity_id2 = 0 for left_block in left[block_id]: if(left_block < 0): extremity_id2 = abs(left_block) + nb_blocks else: extremity_id2 = left_block adjacencymatrix[extremity_id1, extremity_id2] = 1 return adjacencymatrix
[docs]def diff_nc_matrix_m(matrix1, matrix2, nb_blocks): """ Function splitting an adjacency matrix *matrix2* into two matrices : the non-conflicting and the conflicting adjacencies with an other matrix *matrix1* Parameters ---------- matrix1 : SparseMatrix 0-1 Matrix, first adjacency matrix matrix2 : SparseMatrix 0-1 Matrix, second adjacency matrix nb_blocks : int Total number of blocks Returns ------- tuple *nc_adjacencymatrix:* SparseMatrix containing non-conflicting adj. *c_adjacencymatrix:* SparseMatrix containing conflicting adj. """ # 0-1 adjacency matrix containing adjacencies of matrix2 non-conflicting with matrix1 nc_adjacencymatrix = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1) # 0-1 adjacency matrix containing adjacencies of matrix2 that are in conflict with matrix1 c_adjacencymatrix = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1) for extremity_id1, y_ids in matrix2.sparse_matrix.iteritems(): for extremity_id2 in y_ids: if(sum(matrix1[extremity_id1]) + sum(matrix1[extremity_id2]) == 0): nc_adjacencymatrix[extremity_id1, extremity_id2] = 1 nc_adjacencymatrix[extremity_id2, extremity_id1] = 1 else: c_adjacencymatrix[extremity_id1, extremity_id2] = 1 c_adjacencymatrix[extremity_id2, extremity_id1] = 1 return nc_adjacencymatrix, c_adjacencymatrix
[docs]def write_retained_adjs(last_adjs, new_adjs, group1_adjs, group2_adjs, cars, step_nb, car_left, car_right, species1_id, species2_id, species3_id): """ Write adjacencies found for ancestor into the output file Parameters ---------- last_adjs : String Name of the file where previously added adjacencies are stored new_adjs : String Name of the file where new adjacencies must be written group1_adjs : list List of fully conserved non-conflicting adjacencies group2_adjs : list List of partly conserved non-conflicting adjacencies cars : list List of all cars step_nb : int Step of the ProCARs method car_left : list For each car, left neighbor car_right : list For each car, right neighbor species1_id : list List of IDs of species in group1 of partition defined by the ancestor node species2_id : list List of IDs of species in group2 of partition defined by the ancestor node species3_id : list List of IDs of species in group3 of partition defined by the ancestor node """ with open(last_adjs, "r") as last: all_last_adjs = last.readlines() with open(new_adjs, "w") as file_handler: find_info_and_write_adjs(group1_adjs, group2_adjs, car_left, car_right, cars, step_nb, species1_id, species2_id, species3_id, file_handler) file_handler.writelines(all_last_adjs)
[docs]def write_discarded_adjs(filename, group1_adjs, group2_adjs, car_left, car_right, species1_id, species2_id, species3_id, cars, step_nb): """ Write adjacencies found for ancestor into the output file Parameters ---------- filename : String Name of the output file in which writting all adjacencies group1_adjs : list List of car_adjacencies fully conserved but conflicting group2_adjs : list List of car_adjacencies partly conserved and in conflict with fully or partly conserved adjs car_left : list For each car, left neighbor car_right : list For each car, right neighbor species1_id : list List of IDs of species in group1 of partition defined by the ancestor node species2_id : list List of IDs of species in group2 of partition defined by the ancestor node species3_id : list List of IDs of species in group3 of partition defined by the ancestor node cars : list List of all cars step_nb : int Current step of the ProCARs method """ with open(filename, "w") as file_handler: find_info_and_write_adjs(group1_adjs, group2_adjs, car_left, car_right, cars, step_nb, species1_id, species2_id, species3_id, file_handler)
[docs]def find_info_and_write_adjs(group1_adjs, group2_adjs, car_left, car_right, cars, step_nb, species1_id, species2_id, species3_id, file_handler): """ Find the conservation status of all retained adjacencies, and write them into the output file Parameters ---------- group1_adjs : list List of car_adjacencies fully conserved group2_adjs : list List of car_adjacencies partly conserved car_left : list For each car, left neighbor car_right : list For each car, right neighbor cars : list For each car id, ordered list of signed blocks in the car step_nb : int Current step of the ProCars method species1_id : list IDs of group1 species species2_id : list IDs of group2 species species3_id : list IDs of group3 species file_handler : fileObject Open file in which writing the given adjacencies and their information """ for car_adjacency in group1_adjs: labels = adj_func.find_adj_info(car_adjacency, car_left, car_right, species1_id, species2_id, species3_id) utils_IO.write_adjacency(file_handler, cars, car_adjacency, 0, step_nb, labels) for car_adjacency in group2_adjs: labels = adj_func.find_adj_info(car_adjacency, car_left, car_right, species1_id, species2_id, species3_id) utils_IO.write_adjacency(file_handler, cars, car_adjacency, 1, step_nb, labels)
[docs]def find_adjacency_sets(nb_cars, nb_species, blocks, cars, blocks_car, block_position_in_car, car_neigh_bin, species1_id, species2_id, species3_id): """ Finds all sets of adjacencies, according to their status : (non)conflicting, fully/partly conserved, etc. Parameters ---------- nb_cars : int Total number of cars nb_species : int Total number of species blocks : list All blocks, for each species: array of arrays (blocks for each species) blocks = [[*list of blocks for spe_id=0*], [*list of blocks for spe_id=1*], ...] with *list of blocks for spe_id=1* = [[*list of blocks for spe_id=1, chromosome 1*], [*list of blocks for spe_id=1, chromosome 2*], ...] with *list of blocks for spe_id=1, chromosome 1* = [[start, end, num_bloc, orient], [start, end, num_bloc, orient], ... for the n blocks] cars : list For each car id, ordered list of signed blocks in the car blocks_car : list [[*list of block_cars for spe_id = 0*], [*list of block_cars for spe_id = 1*], ...] with *list of block_cars for spe_id = 1* = [[*list of blocks' cars for spe_id=1, chromosome 1*], [*list of blocks' cars for spe_id=1, chromosome 2*], ...] block_position_in_car : list Integer array such that block_position_in_car[block_id] = position of block in car to which it belongs car_neigh_bin : string Name of the binary file where car neighbors information is stored species1_id : list IDs of group1 species species2_id : list IDs of group2 species species3_id : list IDs of group3 species Returns ------- tuple *car_left:* left neighbors of each car, such that left[spe_id][car_id] = [list of left neighbors of car_id in species spe_id] *car_right:* same as car_left, for right neighbors *fs_nc_adj:* fully conserved non-conflicting adjacencies *ps_nc2_adj:* partly conserved non-conflicting adjacencies *fs_c_adj:* fully conserved conflicting adjacencies *ps_nc_c_adj:* partly conserved adjacencies in conflict with fully conserved ones *ps_c_adj:* cnflicting partly conserved adjacencies """ # COMPUTING CARS NEIGHBORS IN SPECIES [car_left, car_right] = compute_cars_neighbors(nb_cars, nb_species, blocks, cars, blocks_car, block_position_in_car, car_neigh_bin) # COMPUTING CONSERVED CARS ADJACENCIES IN ANCESTOR res_adjs = compute_conserved_adjacencies(car_left, car_right, nb_cars, species1_id, species2_id, species3_id) conserved_left_D, conserved_right_D, conserved_left, conserved_right = res_adjs # Adjacency Matrices # ## Abbreviation: # fs : Fully conserved # ps : Partly Conserved # nc : Non-conflicting # c : Conflicting # Adj : Adjacences fs_matrix = compute_adjacencymatrix_from_neighbor_arrays(conserved_left_D, conserved_right_D, nb_cars) [fs_nc_matrix, fs_c_matrix] = adj_func.split_adjacencymatrix(fs_matrix, nb_cars) ps_matrix = compute_adjacencymatrix_from_neighbor_arrays(conserved_left, conserved_right, nb_cars) [ps_r_matrix, _] = diff_nc_matrix_m(fs_nc_matrix, ps_matrix, nb_cars) [ps_nc_matrix, ps_c_matrix] = adj_func.split_adjacencymatrix(ps_r_matrix, nb_cars) # COMPUTING CARS ADJACENCIES IN ANCESTOR [ps_nc2_matrix, ps_nc_c_matrix] = diff_nc_matrix_m(fs_c_matrix, ps_nc_matrix, nb_cars) fs_nc_pair = adj_func.compute_extremity_pair(fs_nc_matrix) fs_nc_adj = adj_func.compute_adjacencies(fs_nc_pair, nb_cars) nb_fs_nc_adj = len(fs_nc_adj) ps_nc2_pair = adj_func.compute_extremity_pair(ps_nc2_matrix) ps_nc2_adj = adj_func.compute_adjacencies(ps_nc2_pair, nb_cars) nb_ps_nc2_adj = len(ps_nc2_adj) fs_c_pair = adj_func.compute_extremity_pair(fs_c_matrix) fs_c_adj = adj_func.compute_adjacencies(fs_c_pair, nb_cars) ps_nc_c_pair = adj_func.compute_extremity_pair(ps_nc_c_matrix) ps_nc_c_adj = adj_func.compute_adjacencies(ps_nc_c_pair, nb_cars) ps_c_pair = adj_func.compute_extremity_pair(ps_c_matrix) ps_c_adj = adj_func.compute_adjacencies(ps_c_pair, nb_cars) print("... " + str(nb_fs_nc_adj) + " fully conserved adjacencies") print("... " + str(nb_ps_nc2_adj) + " partly conserved adjacencies") return car_left, car_right, fs_nc_adj, ps_nc2_adj, fs_c_adj, ps_nc_c_adj, ps_c_adj
[docs]def main(car_file_name, adjacency_file_name_prefix, step_nb, tree_bin, blocks_bin, car_bin, car_neigh_bin): """ Main method From a CAR file, it computes a set of conserved adjacencies between CARs for the ancestor node, and it splits this set into a set of non-conflicting adjacencies and a set of conflicting adjacencies. Parameters ---------- car_file_name : string Prefix of the PQtree files adjacency_file_name_prefix : string Prefix of the Adjacencies files step_nb : int Current step of the ProCARs method tree_bin : string Name of the binary file where information about the species tree is stored blocks_bin : string Name of the binary file where information about blocks is stored car_bin : string Name of the binary file where information about current cars is stored car_neigh_bin : string Name of the binary file where information about car neighbors is stored Returns ------- tuple *nb_final_adj:* total number of adjacencies added *nb_discarded:* total number of discarded adjacencies *fs_nc_adj:* fully conserved non-conflicting adjacencies *ps_nc2_adj:* partly conserved non-conflicting adjacencies *fs_c_adj:* conflicting fully conserved adjacencies *ps_nc_c_adj:* partly conserved adjacencies in conflict with fully conserved ones *ps_c_adj:* conflicting partly conserved adjacencies """ # Read information from blocks and tree (_, _, _, _, _, nb_species, species1_id, species2_id, species3_id, _) = utils_IO.read_binary_file(tree_bin) (blocks, nb_blocks, _, _) = utils_IO.read_binary_file(blocks_bin) # Get CARs information res_car = commons_read_cars(nb_blocks, blocks, nb_species, car_file_name, step_nb, car_bin) [cars, nb_cars, block_position_in_car, blocks_car] = res_car # Split adjacencies into groups according to conservation and conflicts res_adjs = find_adjacency_sets(nb_cars, nb_species, blocks, cars, blocks_car, block_position_in_car, car_neigh_bin, species1_id, species2_id, species3_id) car_left, car_right, fs_nc_adj, ps_nc2_adj, fs_c_adj, ps_nc_c_adj, ps_c_adj = res_adjs # Count adjacencies nb_final_adj = len(fs_nc_adj) + len(ps_nc2_adj) nb_discarded = len(fs_c_adj) + len(ps_nc_c_adj) + len(ps_c_adj) # Writting found car adjacencies for ancestor last_adjs = adjacency_file_name_prefix + "_" + str(step_nb - 1) + ".txt" new_adjs = adjacency_file_name_prefix + "_" + str(step_nb) + ".txt" write_retained_adjs(last_adjs, new_adjs, fs_nc_adj, ps_nc2_adj, cars, step_nb, car_left, car_right, species1_id, species2_id, species3_id) # Write discarded adjacencies filename = new_adjs + "_discarded.txt" write_discarded_adjs(filename, fs_c_adj, ps_nc_c_adj + ps_c_adj, car_left, car_right, species1_id, species2_id, species3_id, cars, step_nb) # print some information print("... " + str(nb_discarded) + " discarded adjacencies\n") return nb_final_adj, nb_discarded, fs_nc_adj, ps_nc2_adj, fs_c_adj, ps_nc_c_adj, ps_c_adj