#!/usr/bin/env python
# -*- coding: utf-8 -*-:
"""
Copyright © Bonsai - LIFL (Université Lille 1, CNRS UMR 8022) and Inria-Lille Nord Europe
contact: aida.ouangraoua@inria.fr, amandine.perrin@inria.fr
This software is a computer program whose purpose is to progressively reconstruct ancestral
gene orders.
This software is governed by the CeCILL-B license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/or redistribute the software under the terms of the CeCILL-B
license as circulated by CEA, CNRS and Inria at the following URL
http://www.cecill.info, or in the LICENCE file at the root directory of this program.
The fact that you are presently reading this means that you have had
knowledge of the CeCILL-B license and that you accept its terms.
---------------------------------------------------
``compute_conserved_adjacencies`` **module description**:
From a CAR file, this module computes a set of conserved adjacencies between cars
for the ancestor node, and it splits this set into :
* a set of fully conserved adjacencies and a set of partly conserved adjacencies first
* among the two sets, a set of non-conflicting adjacencies and a set of conflicting adjacencies
.. moduleauthor:: Aïda Ouangraoua, Amandine PERRIN
February 2014
"""
from procars.utils.util_classes import SparseMatrix
from procars.utils import util_adjacency_functions as adj_func
from procars.utils import utils_IO
[docs]def compute_blocks_cars(nb_species, blocks, block_to_car):
""" Computes the list of blocks' cars for each species
Parameters
----------
nb_species : int
Total number of species
blocks : list
array of arrays (blocks for each species)
blocks = [[*list of blocks for spe_id=0*], [*list of blocks for spe_id=1*], ...]
with *list of blocks for spe_id=1* =
[[*list of blocks for spe_id=1, chromosome 1*],
[*list of blocks for spe_id=1, chromosome 2*],
...]
with *list of blocks for spe_id=1, chromosome 1* =
[[start, end, num_bloc, orient],
[start, end, num_bloc, orient],
... for the n blocks]
block_to_car : list
Integer array such that block_2_car[block_id] = car_id to which block_id belongs
Returns
-------
list
array of arrays (blocks' cars)
blocks_car = [[*list of block_cars for spe_id=0*], [*list of block_cars for spe_id=1*], ...]
with *list of block_cars for spe_id=1* =
[[*list of blocks' cars for spe_id=1, chromosome 1*],
[*list of blocks' cars for spe_id=1, chromosome 2*],
...]
"""
# array containing arrays (one for each species) same as array blocks where blocks are replaced
# by corresponding cars
blocks_car = []
# sort blocks by ascending position in each chr of each species
for spe_id in range(nb_species):
# for each chr
for chr_id in range(len(blocks[spe_id])):
# sort blocks by start position
blocks[spe_id][chr_id].sort()
# for each species
for spe_id in range(nb_species):
blocks_car.append([])
# for each chromosomes in species
for chr_id in range(len(blocks[spe_id])):
blocks_car[spe_id].append([])
# for each block in chromosome
for signed_block in blocks[spe_id][chr_id]:
block_id = signed_block[2]
blocks_car[spe_id][chr_id].append(block_to_car[block_id])
return blocks_car
[docs]def commons_read_cars(nb_blocks, blocks, nb_species, car_file_name, step_nb, binary_file_name):
"""
Function reading blocks from a block file, reading car file, and computing block neighbors
Parameters
----------
nb_blocks : int
Number of blocks
blocks : list
array of arrays (blocks for each species)
blocks = [[*list of blocks for spe_id=0*], [*list of blocks for spe_id=1*], ...]
with *list of blocks for spe_id=1* =
[[*list of blocks for spe_id=1, chromosome 1*],
[*list of blocks for spe_id=1, chromosome 2*],
...]
with *list of blocks for spe_id=1, chromosome 1* =
[[start, end, num_bloc, orient],
[start, end, num_bloc, orient],
... for the n blocks]
nb_species : int
Total number of species
car_file_name : String
Name of the PQtree file where current CARs are stored
step_nb : int
Current step of the ProCars method
binary_file_name : String
Name of the binary file where information about current cars must be saved
Returns
-------
tuple
*cars:* array of arrays (cars)
*nb_cars:* integer, total number of cars
*block_position_in_car:* integer array
*blocks_car:* array of arrays (blocks' cars): see return of **compute_blocks_cars**
"""
car_file_name += "_" + str(int(step_nb) - 1) + ".txt"
cars, block_2_car, block_position_in_car = utils_IO.read_car_file(car_file_name, nb_blocks)
nb_cars = len(cars) - 1
# Compute same as array blocks where blocks are replaced by corresponding cars
blocks_car = compute_blocks_cars(nb_species, blocks, block_2_car)
values_saved = [cars, nb_cars, block_position_in_car, blocks_car]
# save computed values in a binary file
utils_IO.save_binary_information(binary_file_name, values_saved)
return cars, nb_cars, block_position_in_car, blocks_car
[docs]def compute_potential_cars_neighbors(spe_id, blocks, blocks_car):
""" Compute potential pairs of neighboring cars
Parameters
----------
spe_id : int
ID of current species
blocks : list
Positions of blocks for each species (see **commons_read_cars**)
blocks_car : list
Blocks' CARs for each species. See return of **compute_blocks_cars**
Returns
-------
tuple
*neighbors:* pair of cars having consecutive segments in species
*segments_neighbors:* composition of corresponding segments (in terms of signed blocks)
"""
neighbors = [] # pair of cars having consecutive segments in species
segments_neighbors = [] # composition of corresponding segments (in terms of signed blocks)
# for each chromosome in species 'spe_id', find segments of consecutive identical cars
for chr_id in range(len(blocks_car[spe_id])):
neighbors.append([]) # add empty pair of cars to begin
segments_neighbors.append([]) # add empty pair of segments to begin
last_car = 0 # last car processed at the previous loop
last_segment = 0 # segment corresponding to last car
i = 0
while(i < len(blocks_car[spe_id][chr_id])):
# current car
car_id = blocks_car[spe_id][chr_id][i]
# corresponding segment
segment = []
# recover car segment
# while we are in the same current cars
while(i < len(blocks_car[spe_id][chr_id]) and blocks_car[spe_id][chr_id][i] == car_id):
block_id = blocks[spe_id][chr_id][i][2]
sign = blocks[spe_id][chr_id][i][3]
segment.append(sign * block_id)
i += 1
# add new pair of potential neighboring cars
# and, add the pair of corresponding segments
# complete an uncomplete pair of neighboring cars
if(len(neighbors[-1]) < 2):
neighbors[-1].append(car_id)
segments_neighbors[-1].append(segment)
# add a new pair of neighboring cars
else:
neighbors.append([last_car, car_id])
segments_neighbors.append([last_segment, segment])
last_car = car_id
last_segment = segment
return neighbors, segments_neighbors
[docs]def reverse(segment):
""" Function producing the reverse of a segment
Examples
--------
For example::
reverse([1 -2 3]) = [-3 2 -1]
Parameters
----------
segment : list
Segment of CAR IDs
Returns
-------
list
*segment_rev:* reverse of given segment
"""
segment_rev = [] # array : reverse of segment
for signed_block in segment:
segment_rev = [-signed_block] + segment_rev
return segment_rev
[docs]def segment_car_common_extremity(segment, car, block_position_in_car, side_segment, side_car):
""" Function checking if a segment and a car share a same extremity region in terms of synteny.
Parameters
----------
segment : list
Segment of CAR IDs
car : list
Ordered list of signed blocks in the given CAR
block_position_in_car : list
Integer array such that block_position_in_car[block_id] = position of block
in car to which it belongs
side_segment : int
``1`` if we look at right end of the segment, ``0`` otherwise
side_car : int
``1`` if we look at right end of the CAR, ``0`` otherwise
Returns
-------
boolean
True if it shares an extremity, False otherwise
Examples
--------
* segment_car_common_extremity([1 -2 4 -3], [1 2 3 4], 1, 1) = True, because they share
a end region composed of blocks 3 and 4.
* segment_car_common_extremity([1 -2 4 -3], [1 2 3 4], 0, 0) = True, because they share
a begin region composed of blocks 1 and 2.
* segment_car_common_extremity([1 -2 4 -3], [-3 4 2 -1], 0, 1) = True, because they do
share a same region that is the beginning of segment and the end of car
"""
answer = False
segment_rev = segment
if(side_segment == 1):
segment_rev = reverse(segment)
extremity_position = 0
far_position = 0
if(side_car == 1):
extremity_position = len(car) - 1
far_position = len(car) - 1
i = 0
while((i < len(segment_rev)) and
(i < 2 or i != abs(far_position - extremity_position) + 1) and
(i < len(car) - 1)):
signed_block = segment_rev[i]
block_position = block_position_in_car[abs(signed_block)]
if(side_car == 0 and block_position > far_position):
far_position = block_position
if(side_car == 1 and block_position < far_position):
far_position = block_position
i += 1
if (i >= 2 and i == abs(far_position - extremity_position) + 1):
answer = True
return answer
[docs]def compute_extremity(segment, car_id, cars, block_position_in_car, orientation):
""" Function computing the orientation of a CAR in a CAR adjacency.
Parameters
----------
segment : list
List of block IDs
car_id : int
Given CAR id
cars : list
List of all current cars
block_position_in_car : list
Integer array such that block_position_in_car[block_id] = position of block
in car to which it belongs
orientation : int
``1`` if positive orientation, ``-1`` otherwise
Returns
-------
int
*extremity:* oriented car, or 0 if segment is not an extremity of car
"""
extremity = 0
# segment with conserved block extremity
if(segment[-1] == cars[car_id][-1]):
extremity = car_id
elif(segment[-1] == -cars[car_id][0]):
extremity = -car_id
# segment with conserved (unsigned) extremity
elif(len(segment) >= 2):
# segment with conserved (unsigned) block extremity
if(abs(segment[-1]) == abs(cars[car_id][-1])):
extremity = car_id
elif(abs(segment[-1]) == abs(cars[car_id][0])):
extremity = -car_id
# segment with conserved (unsigned) segment extremity
else:
if(segment_car_common_extremity(segment, cars[car_id],
block_position_in_car, 1, 1) == 1):
extremity = car_id
elif(segment_car_common_extremity(segment, cars[car_id],
block_position_in_car, 1, 0) == 1):
extremity = -car_id
# if orientation is reversed, then reverse extremity
if(orientation == -1):
extremity = -extremity
return extremity
[docs]def check_potential_cars_neighbors(spe_id, neighbors, segments_neighbors, cars,
block_position_in_car, left, right):
""" For each pair of potential neighboring CARs, check if the pair of segments
really supports the adjacency
Parameters
----------
spe_id : int
ID of species to check
neighbors : list
List of potential neighbors (with CAR ids). **Ex:** [[], [1, 2], [2, 3], ...]
segments_neighbors : list
List of potential segment neighbors. **Ex.** [[], [[1], [2]], [[2], [3]], ...]
cars : list
List of ordered cars
block_position_in_car : list
Integer array such that block_position_in_car[block_id] = position of block
in car to which it belongs
left : list
Left neighbors of species for all blocks
right : list
Left neighbors of species for all blocks
Returns
-------
tuple
*left* and *right* valid neighbors
"""
# for each valid pair of cars in array neighbors
for num, neighs in enumerate(neighbors):
if(len(neighs) > 1):
[car1, car2] = neighs
[segment1, segment2] = segments_neighbors[num]
extremity1 = compute_extremity(segment1, car1, cars, block_position_in_car, 1)
extremity2 = compute_extremity(reverse(segment2), car2, cars, block_position_in_car, -1)
if(extremity1 != 0 and extremity2 != 0):
if (extremity1 == car1):
right[spe_id][car1].append(extremity2)
else:
left[spe_id][car1].append(-extremity2)
if (extremity2 == car2):
left[spe_id][car2].append(extremity1)
else:
right[spe_id][car2].append(-extremity1)
return left, right
[docs]def compute_cars_neighbors(nb_cars, nb_species, blocks, cars, blocks_car,
block_position_in_car, binary_file_name):
""" Finds all possible neighbors for each CAR
Parameters
----------
nb_cars : int
Total number of CARs
nb_species : int
Total number of species
blocks : list
All blocks, for each species: array of arrays (blocks for each species)
blocks = [[*list of blocks for spe_id=0*], [*list of blocks for spe_id=1*], ...]
with *list of blocks for spe_id=1* =
[[*list of blocks for spe_id=1, chromosome 1*],
[*list of blocks for spe_id=1, chromosome 2*],
...]
with *list of blocks for spe_id=1, chromosome 1* =
[[start, end, num_bloc, orient],
[start, end, num_bloc, orient],
... for the n blocks]
cars : list
List of cars with signed blocks in each one
blocks_cars : list
blocks_car = [[*list of block_cars for spe_id=0*], [*list of block_cars for spe_id=1*], ...]
with *list of block_cars for spe_id=1* =
[[*list of blocks' cars for spe_id=1, chromosome 1*],
[*list of blocks' cars for spe_id=1, chromosome 2*],
...]
block_position_in_car : list
Integer array such that block_position_in_car[block_id] = position of block
in car to which it belongs
Returns
-------
list
*left* car neighbors and *right* car neighbors
such that left[spe_id][car_id] = [list of left neighbors of car_id in species spe_id]
"""
left = [] # array: integer(species_id) -> array(storing left neighbors)
right = [] # array: integer(species_id) -> array(storing right neighbors)
# add cells to arrays left, and right
for spe_id in range(nb_species):
left.append([])
right.append([])
for _ in range(nb_cars+1):
left[spe_id].append([])
right[spe_id].append([])
# for each species
for spe_id in range(nb_species):
# First, compute potential pairs of neighboring cars
neighbors, segments_neighbors = compute_potential_cars_neighbors(spe_id, blocks, blocks_car)
# Second, for each pair of potential neighboring cars, check if the pair of
# segments supports an adjacency
check_potential_cars_neighbors(spe_id, neighbors, segments_neighbors, cars,
block_position_in_car, left, right)
values_saved = [left, right]
utils_IO.save_binary_information(binary_file_name, values_saved)
return left, right
[docs]def fill_conserved_arrays(conserved_left_D, conserved_right_D, conserved_left, conserved_right,
left, right, species1_id, species2_id, species3_id, block1, block2):
""" Method filling the conservation status arrays for a given adjacency *block1, block2*
Parameters
----------
conserved_left_D : list
Array of left fully conserved adjacencies to be filled
conserved_right_D : list
Array of right fully conserved adjacencies to be filled
conserved_left : list
Array of left conserved adjacencies to be filled
conserved_right : list
Array of right conserved adjacencies to be filled
left : list
Array such that left[block_id] is an array containing all left neighbors of block block_id
right : list
Array such that right[block_id] is an array containing all right neighbors of block_id
species1_id: list
IDs of group1 species
species2_id: list
IDs of group2 species
species3_id: list
IDs of group3 species
block1: int
First block id of the given adjacency
block2: int
Second block id of the given adjacency
"""
potential_adjacencies = [[block1, block2], [block1, -block2], [-block1, block2],
[-block1, -block2]]
for [signed_block1, signed_block2] in potential_adjacencies:
support = adj_func.conservation_status([signed_block1, signed_block2], left, right,
species1_id, species2_id, species3_id)[0]
# if adjacency found in the 3 groups: fully conserved adjacency
if(support == 3):
if(signed_block1 > 0):
conserved_right_D[signed_block1].append(signed_block2)
else:
conserved_left_D[-signed_block1].append(-signed_block2)
if(signed_block2 > 0):
conserved_left_D[signed_block2].append(signed_block1)
else:
conserved_right_D[-signed_block2].append(-signed_block1)
# if not fully conserved adjacency, but present in at least one group:
# partly conserved adjacency
elif(support > 1):
if(signed_block1 > 0):
conserved_right[signed_block1].append(signed_block2)
else:
conserved_left[-signed_block1].append(-signed_block2)
if(signed_block2 > 0):
conserved_left[signed_block2].append(signed_block1)
else:
conserved_right[-signed_block2].append(-signed_block1)
[docs]def compute_conserved_adjacencies(left, right, nb_blocks, species1_id, species2_id, species3_id):
""" Procedure computing the conservation status of all potential adjacencies
Fills arrays for fully conserved adjacencies (conserved_left/right_D) and partly conserved
adjacencies (conserved_left/right)
Parameters
----------
left : list
Array such that left[block_id] is an array containing all left neighbors of block block_id
right : list
Array such that right[block_id] is an array containing all right neighbors of block_id
nb_blocks : int
Total number of blocks
species1_id: list
IDs of group1 species
species2_id: list
IDs of group2 species
species3_id: list
IDs of group3 species
Returns
-------
list
*conserved_left_D:* array of left fully conserved adjacencies
*conserved_right_D:* array of right fully conserved adjacencies
*conserved_left:* array of left conserved adjacencies
*conserved_right:* array of right conserved adjacencies
"""
# initialize arrays storing doubly conserved adjacencies
conserved_left_D = [[] for _ in xrange(nb_blocks + 1)]
conserved_right_D = [[] for _ in xrange(nb_blocks + 1)]
# initialize arrays storing simply conserved adjacencies
conserved_left = [[] for _ in xrange(nb_blocks + 1)]
conserved_right = [[] for _ in xrange(nb_blocks + 1)]
# Find status of all possible adjacencies
for block1 in xrange(1, nb_blocks + 1):
for block2 in xrange(block1 + 1, nb_blocks + 1):
# fill arrays with this adjacency
fill_conserved_arrays(conserved_left_D, conserved_right_D,
conserved_left, conserved_right, left, right, species1_id,
species2_id, species3_id, block1, block2)
return conserved_left_D, conserved_right_D, conserved_left, conserved_right
[docs]def compute_adjacencymatrix_from_neighbor_arrays(left, right, nb_blocks):
""" Function computing an adjacency matrix from left and right neighbors
Parameters
----------
left : list
Array such that left[block_id] is an array containing all left neighbors of block block_id
right : list
Array such that right[block_id] is an array containing all right neighbors of block_id
nb_blocks : int
Total number of blocks
Returns
-------
SparseMatrix
*adjacencymatrix:* 0-1 Matrix which rows and columns correspond to block extremities with:
- right extremity of bloc_id : extremity_id = block_id
- left extremity of bloc_id : extremity_id = block_id + nb_blocks
"""
adjacencymatrix = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1)
for block_id in xrange(1, nb_blocks + 1):
extremity_id1 = block_id
extremity_id2 = 0
for right_block in right[block_id]:
if(right_block < 0):
extremity_id2 = abs(right_block)
else:
extremity_id2 = right_block + nb_blocks
adjacencymatrix[extremity_id1, extremity_id2] = 1
extremity_id1 = block_id + nb_blocks
extremity_id2 = 0
for left_block in left[block_id]:
if(left_block < 0):
extremity_id2 = abs(left_block) + nb_blocks
else:
extremity_id2 = left_block
adjacencymatrix[extremity_id1, extremity_id2] = 1
return adjacencymatrix
[docs]def diff_nc_matrix_m(matrix1, matrix2, nb_blocks):
""" Function splitting an adjacency matrix *matrix2* into two matrices : the
non-conflicting and the conflicting adjacencies with an other matrix *matrix1*
Parameters
----------
matrix1 : SparseMatrix
0-1 Matrix, first adjacency matrix
matrix2 : SparseMatrix
0-1 Matrix, second adjacency matrix
nb_blocks : int
Total number of blocks
Returns
-------
tuple
*nc_adjacencymatrix:* SparseMatrix containing non-conflicting adj.
*c_adjacencymatrix:* SparseMatrix containing conflicting adj.
"""
# 0-1 adjacency matrix containing adjacencies of matrix2 non-conflicting with matrix1
nc_adjacencymatrix = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1)
# 0-1 adjacency matrix containing adjacencies of matrix2 that are in conflict with matrix1
c_adjacencymatrix = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1)
for extremity_id1, y_ids in matrix2.sparse_matrix.iteritems():
for extremity_id2 in y_ids:
if(sum(matrix1[extremity_id1]) + sum(matrix1[extremity_id2]) == 0):
nc_adjacencymatrix[extremity_id1, extremity_id2] = 1
nc_adjacencymatrix[extremity_id2, extremity_id1] = 1
else:
c_adjacencymatrix[extremity_id1, extremity_id2] = 1
c_adjacencymatrix[extremity_id2, extremity_id1] = 1
return nc_adjacencymatrix, c_adjacencymatrix
[docs]def write_retained_adjs(last_adjs, new_adjs, group1_adjs, group2_adjs, cars, step_nb,
car_left, car_right, species1_id, species2_id, species3_id):
""" Write adjacencies found for ancestor into the output file
Parameters
----------
last_adjs : String
Name of the file where previously added adjacencies are stored
new_adjs : String
Name of the file where new adjacencies must be written
group1_adjs : list
List of fully conserved non-conflicting adjacencies
group2_adjs : list
List of partly conserved non-conflicting adjacencies
cars : list
List of all cars
step_nb : int
Step of the ProCARs method
car_left : list
For each car, left neighbor
car_right : list
For each car, right neighbor
species1_id : list
List of IDs of species in group1 of partition defined by the ancestor node
species2_id : list
List of IDs of species in group2 of partition defined by the ancestor node
species3_id : list
List of IDs of species in group3 of partition defined by the ancestor node
"""
with open(last_adjs, "r") as last:
all_last_adjs = last.readlines()
with open(new_adjs, "w") as file_handler:
find_info_and_write_adjs(group1_adjs, group2_adjs, car_left, car_right, cars, step_nb,
species1_id, species2_id, species3_id, file_handler)
file_handler.writelines(all_last_adjs)
[docs]def write_discarded_adjs(filename, group1_adjs, group2_adjs, car_left, car_right,
species1_id, species2_id, species3_id, cars, step_nb):
""" Write adjacencies found for ancestor into the output file
Parameters
----------
filename : String
Name of the output file in which writting all adjacencies
group1_adjs : list
List of car_adjacencies fully conserved but conflicting
group2_adjs : list
List of car_adjacencies partly conserved and in conflict with fully or partly conserved adjs
car_left : list
For each car, left neighbor
car_right : list
For each car, right neighbor
species1_id : list
List of IDs of species in group1 of partition defined by the ancestor node
species2_id : list
List of IDs of species in group2 of partition defined by the ancestor node
species3_id : list
List of IDs of species in group3 of partition defined by the ancestor node
cars : list
List of all cars
step_nb : int
Current step of the ProCARs method
"""
with open(filename, "w") as file_handler:
find_info_and_write_adjs(group1_adjs, group2_adjs, car_left, car_right, cars, step_nb,
species1_id, species2_id, species3_id, file_handler)
[docs]def find_info_and_write_adjs(group1_adjs, group2_adjs, car_left, car_right, cars, step_nb,
species1_id, species2_id, species3_id, file_handler):
""" Find the conservation status of all retained adjacencies, and write them
into the output file
Parameters
----------
group1_adjs : list
List of car_adjacencies fully conserved
group2_adjs : list
List of car_adjacencies partly conserved
car_left : list
For each car, left neighbor
car_right : list
For each car, right neighbor
cars : list
For each car id, ordered list of signed blocks in the car
step_nb : int
Current step of the ProCars method
species1_id : list
IDs of group1 species
species2_id : list
IDs of group2 species
species3_id : list
IDs of group3 species
file_handler : fileObject
Open file in which writing the given adjacencies and their information
"""
for car_adjacency in group1_adjs:
labels = adj_func.find_adj_info(car_adjacency, car_left, car_right,
species1_id, species2_id, species3_id)
utils_IO.write_adjacency(file_handler, cars, car_adjacency, 0, step_nb, labels)
for car_adjacency in group2_adjs:
labels = adj_func.find_adj_info(car_adjacency, car_left, car_right,
species1_id, species2_id, species3_id)
utils_IO.write_adjacency(file_handler, cars, car_adjacency, 1, step_nb, labels)
[docs]def find_adjacency_sets(nb_cars, nb_species, blocks, cars, blocks_car, block_position_in_car,
car_neigh_bin, species1_id, species2_id, species3_id):
""" Finds all sets of adjacencies, according to their status : (non)conflicting,
fully/partly conserved, etc.
Parameters
----------
nb_cars : int
Total number of cars
nb_species : int
Total number of species
blocks : list
All blocks, for each species: array of arrays (blocks for each species)
blocks = [[*list of blocks for spe_id=0*], [*list of blocks for spe_id=1*], ...]
with *list of blocks for spe_id=1* =
[[*list of blocks for spe_id=1, chromosome 1*],
[*list of blocks for spe_id=1, chromosome 2*],
...]
with *list of blocks for spe_id=1, chromosome 1* =
[[start, end, num_bloc, orient],
[start, end, num_bloc, orient],
... for the n blocks]
cars : list
For each car id, ordered list of signed blocks in the car
blocks_car : list
[[*list of block_cars for spe_id = 0*], [*list of block_cars for spe_id = 1*], ...]
with *list of block_cars for spe_id = 1* =
[[*list of blocks' cars for spe_id=1, chromosome 1*],
[*list of blocks' cars for spe_id=1, chromosome 2*],
...]
block_position_in_car : list
Integer array such that block_position_in_car[block_id] = position of block in car
to which it belongs
car_neigh_bin : string
Name of the binary file where car neighbors information is stored
species1_id : list
IDs of group1 species
species2_id : list
IDs of group2 species
species3_id : list
IDs of group3 species
Returns
-------
tuple
*car_left:* left neighbors of each car, such that left[spe_id][car_id] =
[list of left neighbors of car_id in species spe_id]
*car_right:* same as car_left, for right neighbors
*fs_nc_adj:* fully conserved non-conflicting adjacencies
*ps_nc2_adj:* partly conserved non-conflicting adjacencies
*fs_c_adj:* fully conserved conflicting adjacencies
*ps_nc_c_adj:* partly conserved adjacencies in conflict with fully conserved ones
*ps_c_adj:* cnflicting partly conserved adjacencies
"""
# COMPUTING CARS NEIGHBORS IN SPECIES
[car_left, car_right] = compute_cars_neighbors(nb_cars, nb_species, blocks, cars, blocks_car,
block_position_in_car, car_neigh_bin)
# COMPUTING CONSERVED CARS ADJACENCIES IN ANCESTOR
res_adjs = compute_conserved_adjacencies(car_left, car_right, nb_cars,
species1_id, species2_id, species3_id)
conserved_left_D, conserved_right_D, conserved_left, conserved_right = res_adjs
# Adjacency Matrices
# ## Abbreviation:
# fs : Fully conserved
# ps : Partly Conserved
# nc : Non-conflicting
# c : Conflicting
# Adj : Adjacences
fs_matrix = compute_adjacencymatrix_from_neighbor_arrays(conserved_left_D,
conserved_right_D, nb_cars)
[fs_nc_matrix, fs_c_matrix] = adj_func.split_adjacencymatrix(fs_matrix, nb_cars)
ps_matrix = compute_adjacencymatrix_from_neighbor_arrays(conserved_left, conserved_right,
nb_cars)
[ps_r_matrix, _] = diff_nc_matrix_m(fs_nc_matrix, ps_matrix, nb_cars)
[ps_nc_matrix, ps_c_matrix] = adj_func.split_adjacencymatrix(ps_r_matrix, nb_cars)
# COMPUTING CARS ADJACENCIES IN ANCESTOR
[ps_nc2_matrix, ps_nc_c_matrix] = diff_nc_matrix_m(fs_c_matrix, ps_nc_matrix, nb_cars)
fs_nc_pair = adj_func.compute_extremity_pair(fs_nc_matrix)
fs_nc_adj = adj_func.compute_adjacencies(fs_nc_pair, nb_cars)
nb_fs_nc_adj = len(fs_nc_adj)
ps_nc2_pair = adj_func.compute_extremity_pair(ps_nc2_matrix)
ps_nc2_adj = adj_func.compute_adjacencies(ps_nc2_pair, nb_cars)
nb_ps_nc2_adj = len(ps_nc2_adj)
fs_c_pair = adj_func.compute_extremity_pair(fs_c_matrix)
fs_c_adj = adj_func.compute_adjacencies(fs_c_pair, nb_cars)
ps_nc_c_pair = adj_func.compute_extremity_pair(ps_nc_c_matrix)
ps_nc_c_adj = adj_func.compute_adjacencies(ps_nc_c_pair, nb_cars)
ps_c_pair = adj_func.compute_extremity_pair(ps_c_matrix)
ps_c_adj = adj_func.compute_adjacencies(ps_c_pair, nb_cars)
print("... " + str(nb_fs_nc_adj) + " fully conserved adjacencies")
print("... " + str(nb_ps_nc2_adj) + " partly conserved adjacencies")
return car_left, car_right, fs_nc_adj, ps_nc2_adj, fs_c_adj, ps_nc_c_adj, ps_c_adj
[docs]def main(car_file_name, adjacency_file_name_prefix, step_nb,
tree_bin, blocks_bin, car_bin, car_neigh_bin):
""" Main method
From a CAR file, it computes a set of conserved adjacencies between CARs
for the ancestor node, and it splits this set into a set of non-conflicting
adjacencies and a set of conflicting adjacencies.
Parameters
----------
car_file_name : string
Prefix of the PQtree files
adjacency_file_name_prefix : string
Prefix of the Adjacencies files
step_nb : int
Current step of the ProCARs method
tree_bin : string
Name of the binary file where information about the species tree is stored
blocks_bin : string
Name of the binary file where information about blocks is stored
car_bin : string
Name of the binary file where information about current cars is stored
car_neigh_bin : string
Name of the binary file where information about car neighbors is stored
Returns
-------
tuple
*nb_final_adj:* total number of adjacencies added
*nb_discarded:* total number of discarded adjacencies
*fs_nc_adj:* fully conserved non-conflicting adjacencies
*ps_nc2_adj:* partly conserved non-conflicting adjacencies
*fs_c_adj:* conflicting fully conserved adjacencies
*ps_nc_c_adj:* partly conserved adjacencies in conflict with fully conserved ones
*ps_c_adj:* conflicting partly conserved adjacencies
"""
# Read information from blocks and tree
(_, _, _, _, _, nb_species, species1_id,
species2_id, species3_id, _) = utils_IO.read_binary_file(tree_bin)
(blocks, nb_blocks, _, _) = utils_IO.read_binary_file(blocks_bin)
# Get CARs information
res_car = commons_read_cars(nb_blocks, blocks, nb_species, car_file_name, step_nb, car_bin)
[cars, nb_cars, block_position_in_car, blocks_car] = res_car
# Split adjacencies into groups according to conservation and conflicts
res_adjs = find_adjacency_sets(nb_cars, nb_species, blocks, cars, blocks_car,
block_position_in_car, car_neigh_bin, species1_id,
species2_id, species3_id)
car_left, car_right, fs_nc_adj, ps_nc2_adj, fs_c_adj, ps_nc_c_adj, ps_c_adj = res_adjs
# Count adjacencies
nb_final_adj = len(fs_nc_adj) + len(ps_nc2_adj)
nb_discarded = len(fs_c_adj) + len(ps_nc_c_adj) + len(ps_c_adj)
# Writting found car adjacencies for ancestor
last_adjs = adjacency_file_name_prefix + "_" + str(step_nb - 1) + ".txt"
new_adjs = adjacency_file_name_prefix + "_" + str(step_nb) + ".txt"
write_retained_adjs(last_adjs, new_adjs, fs_nc_adj, ps_nc2_adj, cars, step_nb,
car_left, car_right, species1_id, species2_id, species3_id)
# Write discarded adjacencies
filename = new_adjs + "_discarded.txt"
write_discarded_adjs(filename, fs_c_adj, ps_nc_c_adj + ps_c_adj, car_left, car_right,
species1_id, species2_id, species3_id, cars, step_nb)
# print some information
print("... " + str(nb_discarded) + " discarded adjacencies\n")
return nb_final_adj, nb_discarded, fs_nc_adj, ps_nc2_adj, fs_c_adj, ps_nc_c_adj, ps_c_adj