Source code for procars.step_modules.compute_conserved_adjacencies

#!/usr/bin/env python
# -*- coding: utf-8 -*-:

"""
Copyright © Bonsai - LIFL (Université Lille 1, CNRS UMR 8022) and Inria-Lille Nord Europe

contact: aida.ouangraoua@inria.fr, amandine.perrin@inria.fr

This software is a computer program whose purpose is to progressively reconstruct ancestral
gene orders.

This software is governed by the CeCILL-B license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/or redistribute the software under the terms of the CeCILL-B
license as circulated by CEA, CNRS and Inria at the following URL
http://www.cecill.info, or in the LICENCE file at the root directory of this program.

The fact that you are presently reading this means that you have had
knowledge of the CeCILL-B license and that you accept its terms.

---------------------------------------------------

``compute_conserved_adjacencies`` **module description**:

From a CAR file, this module computes a set of conserved  adjacencies between cars
for the ancestor node, and it splits this set into :

    * a set of fully conserved adjacencies and a set of partly conserved adjacencies first
    * among the two sets, a set of non-conflicting adjacencies and a set of conflicting adjacencies

.. moduleauthor:: Aïda Ouangraoua, Amandine PERRIN

February 2014

"""


from procars.utils.util_classes import SparseMatrix
from procars.utils import util_adjacency_functions as adj_func
from procars.utils import utils_IO


[docs]def compute_blocks_cars(nb_species, blocks, block_to_car):
    """ Computes the list of blocks' cars for each species

    Parameters
    ----------
    nb_species : int
        Total number of species
    blocks : list
        array of arrays (blocks for each species)

        blocks = [[*list of blocks for spe_id=0*], [*list of blocks for spe_id=1*], ...]

        with *list of blocks for spe_id=1* =
        [[*list of blocks for spe_id=1, chromosome 1*],
        [*list of blocks for spe_id=1, chromosome 2*],
        ...]

        with *list of blocks for spe_id=1, chromosome 1* =
        [[start, end, num_bloc, orient],
        [start, end, num_bloc, orient],
        ... for the n blocks]
    block_to_car : list
        Integer array such that block_2_car[block_id] = car_id to which block_id belongs

    Returns
    -------
    list
        array of arrays (blocks' cars)

        blocks_car = [[*list of block_cars for spe_id=0*], [*list of block_cars for spe_id=1*], ...]

        with *list of block_cars for spe_id=1* =
        [[*list of blocks' cars for spe_id=1, chromosome 1*],
        [*list of blocks' cars for spe_id=1, chromosome 2*],
        ...]
    """
    # array containing arrays (one for each species) same as array blocks where blocks are replaced
    # by corresponding cars
    blocks_car = []
    # sort blocks by ascending position in each chr of each species
    for spe_id in range(nb_species):
        # for each chr
        for chr_id in range(len(blocks[spe_id])):
            # sort blocks by start position
            blocks[spe_id][chr_id].sort()
    # for each species
    for spe_id in range(nb_species):
        blocks_car.append([])
        # for each chromosomes in species
        for chr_id in range(len(blocks[spe_id])):
            blocks_car[spe_id].append([])
            # for each block in chromosome
            for signed_block in blocks[spe_id][chr_id]:
                block_id = signed_block[2]
                blocks_car[spe_id][chr_id].append(block_to_car[block_id])
    return blocks_car


[docs]def commons_read_cars(nb_blocks, blocks, nb_species, car_file_name, step_nb, binary_file_name):
    """
    Function reading blocks from a block file, reading car file, and computing block neighbors

    Parameters
    ----------
    nb_blocks : int
        Number of blocks
    blocks : list
        array of arrays (blocks for each species)

        blocks = [[*list of blocks for spe_id=0*], [*list of blocks for spe_id=1*], ...]

        with *list of blocks for spe_id=1* =
        [[*list of blocks for spe_id=1, chromosome 1*],
        [*list of blocks for spe_id=1, chromosome 2*],
        ...]

        with *list of blocks for spe_id=1, chromosome 1* =
        [[start, end, num_bloc, orient],
        [start, end, num_bloc, orient],
        ... for the n blocks]
    nb_species : int
        Total number of species
    car_file_name : String
        Name of the PQtree file where current CARs are stored
    step_nb : int
        Current step of the ProCars method
    binary_file_name : String
        Name of the binary file where information about current cars must be saved

    Returns
    -------
    tuple
        *cars:* array of arrays (cars)

        *nb_cars:* integer, total number of cars

        *block_position_in_car:* integer array

        *blocks_car:* array of arrays (blocks' cars): see return of **compute_blocks_cars**
    """
    car_file_name += "_" + str(int(step_nb) - 1) + ".txt"
    cars, block_2_car, block_position_in_car = utils_IO.read_car_file(car_file_name, nb_blocks)
    nb_cars = len(cars) - 1
    # Compute same as array blocks where blocks are replaced by corresponding cars
    blocks_car = compute_blocks_cars(nb_species, blocks, block_2_car)
    values_saved = [cars, nb_cars, block_position_in_car, blocks_car]
    # save computed values in a binary file
    utils_IO.save_binary_information(binary_file_name, values_saved)
    return cars, nb_cars, block_position_in_car, blocks_car


[docs]def compute_potential_cars_neighbors(spe_id, blocks, blocks_car):
    """ Compute potential pairs of neighboring cars

    Parameters
    ----------
    spe_id : int
        ID of current species
    blocks : list
        Positions of blocks for each species (see **commons_read_cars**)
    blocks_car : list
        Blocks' CARs for each species. See return of **compute_blocks_cars**

    Returns
    -------
    tuple
        *neighbors:* pair of cars having consecutive segments in species

        *segments_neighbors:* composition of corresponding segments (in terms of signed blocks)
    """
    neighbors = []    # pair of cars having consecutive segments in species
    segments_neighbors = []   # composition of corresponding segments (in terms of signed blocks)
    # for each chromosome in species 'spe_id', find segments of consecutive identical cars
    for chr_id in range(len(blocks_car[spe_id])):
        neighbors.append([])  # add empty pair of cars to begin
        segments_neighbors.append([])   # add empty pair of segments to begin
        last_car = 0        # last car processed at the previous loop
        last_segment = 0    # segment corresponding to last car
        i = 0
        while(i < len(blocks_car[spe_id][chr_id])):
            # current car
            car_id = blocks_car[spe_id][chr_id][i]
            # corresponding segment
            segment = []
            # recover car segment
            # while we are in the same current cars
            while(i < len(blocks_car[spe_id][chr_id]) and blocks_car[spe_id][chr_id][i] == car_id):
                block_id = blocks[spe_id][chr_id][i][2]
                sign = blocks[spe_id][chr_id][i][3]
                segment.append(sign * block_id)
                i += 1
            # add new pair of potential neighboring cars
            # and, add the pair of corresponding segments
            # complete an uncomplete pair of neighboring cars
            if(len(neighbors[-1]) < 2):
                neighbors[-1].append(car_id)
                segments_neighbors[-1].append(segment)
            # add a new pair of neighboring cars
            else:
                neighbors.append([last_car, car_id])
                segments_neighbors.append([last_segment, segment])
            last_car = car_id
            last_segment = segment
    return neighbors, segments_neighbors


[docs]def reverse(segment):
    """ Function producing the reverse of a segment

    Examples
    --------
    For example::

        reverse([1 -2 3]) = [-3 2 -1]

    Parameters
    ----------
    segment : list
        Segment of CAR IDs

    Returns
    -------
    list
        *segment_rev:* reverse of given segment
    """
    segment_rev = []   # array : reverse of segment
    for signed_block in segment:
        segment_rev = [-signed_block] + segment_rev
    return segment_rev


[docs]def segment_car_common_extremity(segment, car, block_position_in_car, side_segment, side_car):
    """ Function checking if a segment and a car share a same extremity region in terms of synteny.

    Parameters
    ----------
    segment : list
        Segment of CAR IDs
    car : list
        Ordered list of signed blocks in the given CAR
    block_position_in_car : list
        Integer array such that block_position_in_car[block_id] = position of block
        in car to which it belongs
    side_segment : int
        ``1`` if we look at right end of the segment, ``0`` otherwise
    side_car : int
        ``1`` if we look at right end of the CAR, ``0`` otherwise

    Returns
    -------
    boolean
        True if it shares an extremity, False otherwise

    Examples
    --------

        * segment_car_common_extremity([1 -2 4 -3], [1 2 3 4], 1, 1) = True, because they share
          a end region composed of blocks 3 and 4.
        * segment_car_common_extremity([1 -2 4 -3], [1 2 3 4], 0, 0) = True, because they share
          a begin region composed of blocks 1 and 2.
        * segment_car_common_extremity([1 -2 4 -3], [-3 4 2 -1], 0, 1) = True, because they do
          share a same region that is the beginning of segment and the end of car
    """
    answer = False
    segment_rev = segment
    if(side_segment == 1):
        segment_rev = reverse(segment)
    extremity_position = 0
    far_position = 0
    if(side_car == 1):
        extremity_position = len(car) - 1
        far_position = len(car) - 1
    i = 0
    while((i < len(segment_rev)) and
          (i < 2 or i != abs(far_position - extremity_position) + 1) and
          (i < len(car) - 1)):
        signed_block = segment_rev[i]
        block_position = block_position_in_car[abs(signed_block)]
        if(side_car == 0 and block_position > far_position):
            far_position = block_position
        if(side_car == 1 and block_position < far_position):
            far_position = block_position
        i += 1
    if (i >= 2 and i == abs(far_position - extremity_position) + 1):
        answer = True
    return answer


[docs]def compute_extremity(segment, car_id, cars, block_position_in_car, orientation):
    """ Function computing the orientation of a CAR in a CAR adjacency.

    Parameters
    ----------
    segment : list
        List of block IDs
    car_id : int
        Given CAR id
    cars : list
        List of all current cars
    block_position_in_car : list
        Integer array such that block_position_in_car[block_id] = position of block
        in car to which it belongs
    orientation : int
        ``1`` if positive orientation, ``-1`` otherwise

    Returns
    -------
    int
        *extremity:* oriented car, or 0 if segment is not an extremity of car
    """
    extremity = 0
    # segment with conserved block extremity
    if(segment[-1] == cars[car_id][-1]):
        extremity = car_id
    elif(segment[-1] == -cars[car_id][0]):
        extremity = -car_id
    # segment with conserved (unsigned) extremity
    elif(len(segment) >= 2):
        # segment with conserved (unsigned) block extremity
        if(abs(segment[-1]) == abs(cars[car_id][-1])):
            extremity = car_id
        elif(abs(segment[-1]) == abs(cars[car_id][0])):
            extremity = -car_id
        # segment with conserved (unsigned) segment extremity
        else:
            if(segment_car_common_extremity(segment, cars[car_id],
                                            block_position_in_car, 1, 1) == 1):
                extremity = car_id
            elif(segment_car_common_extremity(segment, cars[car_id],
                                              block_position_in_car, 1, 0) == 1):
                extremity = -car_id
    # if orientation is reversed, then reverse extremity
    if(orientation == -1):
        extremity = -extremity
    return extremity


[docs]def check_potential_cars_neighbors(spe_id, neighbors, segments_neighbors, cars,
                                   block_position_in_car, left, right):
    """ For each pair of potential neighboring CARs, check if the pair of segments
    really supports the adjacency

    Parameters
    ----------
    spe_id : int
        ID of species to check
    neighbors : list
        List of potential neighbors (with CAR ids). **Ex:** [[], [1, 2], [2, 3], ...]
    segments_neighbors : list
        List of potential segment neighbors. **Ex.** [[], [[1], [2]], [[2], [3]], ...]
    cars : list
        List of ordered cars
    block_position_in_car : list
        Integer array such that block_position_in_car[block_id] = position of block
        in car to which it belongs
    left : list
        Left neighbors of species for all blocks
    right : list
        Left neighbors of species for all blocks

    Returns
    -------
    tuple
        *left* and *right* valid neighbors
    """
    # for each valid pair of cars in array neighbors
    for num, neighs in enumerate(neighbors):
        if(len(neighs) > 1):
            [car1, car2] = neighs
            [segment1, segment2] = segments_neighbors[num]
            extremity1 = compute_extremity(segment1, car1, cars, block_position_in_car, 1)
            extremity2 = compute_extremity(reverse(segment2), car2, cars, block_position_in_car, -1)
            if(extremity1 != 0 and extremity2 != 0):
                if (extremity1 == car1):
                    right[spe_id][car1].append(extremity2)
                else:
                    left[spe_id][car1].append(-extremity2)
                if (extremity2 == car2):
                    left[spe_id][car2].append(extremity1)
                else:
                    right[spe_id][car2].append(-extremity1)
    return left, right


[docs]def compute_cars_neighbors(nb_cars, nb_species, blocks, cars, blocks_car,
                           block_position_in_car, binary_file_name):
    """ Finds all possible neighbors for each CAR

    Parameters
    ----------
    nb_cars : int
        Total number of CARs
    nb_species : int
        Total number of species
    blocks : list
        All blocks, for each species: array of arrays (blocks for each species)

        blocks = [[*list of blocks for spe_id=0*], [*list of blocks for spe_id=1*], ...]

        with *list of blocks for spe_id=1* =
        [[*list of blocks for spe_id=1, chromosome 1*],
        [*list of blocks for spe_id=1, chromosome 2*],
        ...]

        with *list of blocks for spe_id=1, chromosome 1* =
        [[start, end, num_bloc, orient],
        [start, end, num_bloc, orient],
        ... for the n blocks]
    cars : list
        List of cars with signed blocks in each one
    blocks_cars : list
        blocks_car = [[*list of block_cars for spe_id=0*], [*list of block_cars for spe_id=1*], ...]

        with *list of block_cars for spe_id=1* =
        [[*list of blocks' cars for spe_id=1, chromosome 1*],
        [*list of blocks' cars for spe_id=1, chromosome 2*],
        ...]
    block_position_in_car : list
        Integer array such that block_position_in_car[block_id] = position of block
        in car to which it belongs

    Returns
    -------
    list
        *left* car neighbors and *right* car neighbors
        such that left[spe_id][car_id] = [list of left neighbors of car_id in species spe_id]
    """
    left = []    # array: integer(species_id) -> array(storing left neighbors)
    right = []   # array: integer(species_id) -> array(storing right neighbors)

    # add cells to arrays left, and right
    for spe_id in range(nb_species):
        left.append([])
        right.append([])
        for _ in range(nb_cars+1):
            left[spe_id].append([])
            right[spe_id].append([])
    # for each species
    for spe_id in range(nb_species):
        # First, compute potential pairs of neighboring cars
        neighbors, segments_neighbors = compute_potential_cars_neighbors(spe_id, blocks, blocks_car)
        # Second, for each pair of potential neighboring cars, check if the pair of
        # segments supports an adjacency
        check_potential_cars_neighbors(spe_id, neighbors, segments_neighbors, cars,
                                       block_position_in_car, left, right)
    values_saved = [left, right]
    utils_IO.save_binary_information(binary_file_name, values_saved)
    return left, right


[docs]def fill_conserved_arrays(conserved_left_D, conserved_right_D, conserved_left, conserved_right,
                          left, right, species1_id, species2_id, species3_id, block1, block2):
    """ Method filling the conservation status arrays for a given adjacency *block1, block2*

    Parameters
    ----------
    conserved_left_D : list
        Array of left fully conserved adjacencies to be filled
    conserved_right_D : list
        Array of right fully conserved adjacencies to be filled
    conserved_left : list
        Array of left conserved adjacencies to be filled
    conserved_right : list
        Array of right conserved adjacencies to be filled
    left : list
        Array such that left[block_id] is an array containing all left neighbors of block block_id
    right : list
        Array such that right[block_id] is an array containing all right neighbors of block_id
    species1_id: list
        IDs of group1 species
    species2_id: list
        IDs of group2 species
    species3_id: list
        IDs of group3 species
    block1: int
        First block id of the given adjacency
    block2: int
        Second block id of the given adjacency
    """
    potential_adjacencies = [[block1, block2], [block1, -block2], [-block1, block2],
                             [-block1, -block2]]
    for [signed_block1, signed_block2] in potential_adjacencies:
        support = adj_func.conservation_status([signed_block1, signed_block2], left, right,
                                               species1_id, species2_id, species3_id)[0]
        # if adjacency found in the 3 groups: fully conserved adjacency
        if(support == 3):
            if(signed_block1 > 0):
                conserved_right_D[signed_block1].append(signed_block2)
            else:
                conserved_left_D[-signed_block1].append(-signed_block2)
            if(signed_block2 > 0):
                conserved_left_D[signed_block2].append(signed_block1)
            else:
                conserved_right_D[-signed_block2].append(-signed_block1)
        # if not fully conserved adjacency, but present in at least one group:
        # partly conserved adjacency
        elif(support > 1):
            if(signed_block1 > 0):
                conserved_right[signed_block1].append(signed_block2)
            else:
                conserved_left[-signed_block1].append(-signed_block2)
            if(signed_block2 > 0):
                conserved_left[signed_block2].append(signed_block1)
            else:
                conserved_right[-signed_block2].append(-signed_block1)


[docs]def compute_conserved_adjacencies(left, right, nb_blocks, species1_id, species2_id, species3_id):
    """ Procedure computing the conservation status of all potential adjacencies

    Fills arrays for fully conserved adjacencies (conserved_left/right_D) and partly conserved
    adjacencies (conserved_left/right)

    Parameters
    ----------
    left : list
        Array such that left[block_id] is an array containing all left neighbors of block block_id
    right : list
        Array such that right[block_id] is an array containing all right neighbors of block_id
    nb_blocks : int
        Total number of blocks
    species1_id: list
        IDs of group1 species
    species2_id: list
        IDs of group2 species
    species3_id: list
        IDs of group3 species

    Returns
    -------
    list
        *conserved_left_D:* array of left fully conserved adjacencies

        *conserved_right_D:* array of right fully conserved adjacencies

        *conserved_left:* array of left conserved adjacencies

        *conserved_right:* array of right conserved adjacencies
    """
    # initialize arrays storing doubly conserved adjacencies
    conserved_left_D = [[] for _ in xrange(nb_blocks + 1)]
    conserved_right_D = [[] for _ in xrange(nb_blocks + 1)]
    # initialize arrays storing simply conserved adjacencies
    conserved_left = [[] for _ in xrange(nb_blocks + 1)]
    conserved_right = [[] for _ in xrange(nb_blocks + 1)]
    # Find status of all possible adjacencies
    for block1 in xrange(1, nb_blocks + 1):
        for block2 in xrange(block1 + 1, nb_blocks + 1):
            # fill arrays with this adjacency
            fill_conserved_arrays(conserved_left_D, conserved_right_D,
                                  conserved_left, conserved_right, left, right, species1_id,
                                  species2_id, species3_id, block1, block2)
    return conserved_left_D, conserved_right_D, conserved_left, conserved_right


[docs]def compute_adjacencymatrix_from_neighbor_arrays(left, right, nb_blocks):
    """ Function computing an adjacency matrix from left and right neighbors

    Parameters
    ----------
    left : list
        Array such that left[block_id] is an array containing all left neighbors of block block_id
    right : list
        Array such that right[block_id] is an array containing all right neighbors of block_id
    nb_blocks : int
        Total number of blocks

    Returns
    -------
    SparseMatrix
        *adjacencymatrix:* 0-1 Matrix which rows and columns correspond to block extremities with:
            - right extremity of bloc_id : extremity_id = block_id
            - left extremity of bloc_id : extremity_id = block_id + nb_blocks
    """
    adjacencymatrix = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1)
    for block_id in xrange(1, nb_blocks + 1):
        extremity_id1 = block_id
        extremity_id2 = 0
        for right_block in right[block_id]:
            if(right_block < 0):
                extremity_id2 = abs(right_block)
            else:
                extremity_id2 = right_block + nb_blocks
            adjacencymatrix[extremity_id1, extremity_id2] = 1
        extremity_id1 = block_id + nb_blocks
        extremity_id2 = 0
        for left_block in left[block_id]:
            if(left_block < 0):
                extremity_id2 = abs(left_block) + nb_blocks
            else:
                extremity_id2 = left_block
            adjacencymatrix[extremity_id1, extremity_id2] = 1
    return adjacencymatrix


[docs]def diff_nc_matrix_m(matrix1, matrix2, nb_blocks):
    """ Function splitting an adjacency matrix *matrix2* into two matrices : the
    non-conflicting and the conflicting adjacencies with an other matrix *matrix1*

    Parameters
    ----------
    matrix1 : SparseMatrix
        0-1 Matrix, first adjacency matrix
    matrix2 : SparseMatrix
        0-1 Matrix, second adjacency matrix
    nb_blocks : int
        Total number of blocks

    Returns
    -------
    tuple
        *nc_adjacencymatrix:* SparseMatrix containing non-conflicting adj.

        *c_adjacencymatrix:* SparseMatrix containing conflicting adj.
    """
    # 0-1 adjacency matrix containing adjacencies of matrix2 non-conflicting with matrix1
    nc_adjacencymatrix = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1)
    # 0-1 adjacency matrix containing adjacencies of matrix2 that are in conflict with matrix1
    c_adjacencymatrix = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1)
    for extremity_id1, y_ids in matrix2.sparse_matrix.iteritems():
        for extremity_id2 in y_ids:
            if(sum(matrix1[extremity_id1]) + sum(matrix1[extremity_id2]) == 0):
                nc_adjacencymatrix[extremity_id1, extremity_id2] = 1
                nc_adjacencymatrix[extremity_id2, extremity_id1] = 1
            else:
                c_adjacencymatrix[extremity_id1, extremity_id2] = 1
                c_adjacencymatrix[extremity_id2, extremity_id1] = 1
    return nc_adjacencymatrix, c_adjacencymatrix


[docs]def write_retained_adjs(last_adjs, new_adjs, group1_adjs, group2_adjs, cars, step_nb,
                        car_left, car_right, species1_id, species2_id, species3_id):
    """ Write adjacencies found for ancestor into the output file

    Parameters
    ----------
    last_adjs : String
        Name of the file where previously added adjacencies are stored
    new_adjs : String
        Name of the file where new adjacencies must be written
    group1_adjs : list
        List of fully conserved non-conflicting adjacencies
    group2_adjs : list
        List of partly conserved non-conflicting adjacencies
    cars : list
        List of all cars
    step_nb : int
        Step of the ProCARs method
    car_left : list
        For each car, left neighbor
    car_right : list
        For each car, right neighbor
    species1_id : list
        List of IDs of species in group1 of partition defined by the ancestor node
    species2_id : list
        List of IDs of species in group2 of partition defined by the ancestor node
    species3_id : list
        List of IDs of species in group3 of partition defined by the ancestor node
    """
    with open(last_adjs, "r") as last:
        all_last_adjs = last.readlines()
    with open(new_adjs, "w") as file_handler:
        find_info_and_write_adjs(group1_adjs, group2_adjs, car_left, car_right, cars, step_nb,
                                 species1_id, species2_id, species3_id, file_handler)
        file_handler.writelines(all_last_adjs)


[docs]def write_discarded_adjs(filename, group1_adjs, group2_adjs, car_left, car_right,
                         species1_id, species2_id, species3_id, cars, step_nb):
    """ Write adjacencies found for ancestor into the output file

    Parameters
    ----------
    filename : String
        Name of the output file in which writting all adjacencies
    group1_adjs : list
        List of car_adjacencies fully conserved but conflicting
    group2_adjs : list
        List of car_adjacencies partly conserved and in conflict with fully or partly conserved adjs
    car_left : list
        For each car, left neighbor
    car_right : list
        For each car, right neighbor
    species1_id : list
        List of IDs of species in group1 of partition defined by the ancestor node
    species2_id : list
        List of IDs of species in group2 of partition defined by the ancestor node
    species3_id : list
        List of IDs of species in group3 of partition defined by the ancestor node
    cars : list
        List of all cars
    step_nb : int
        Current step of the ProCARs method
    """
    with open(filename, "w") as file_handler:
        find_info_and_write_adjs(group1_adjs, group2_adjs, car_left, car_right, cars, step_nb,
                                 species1_id, species2_id, species3_id, file_handler)


[docs]def find_info_and_write_adjs(group1_adjs, group2_adjs, car_left, car_right, cars, step_nb,
                             species1_id, species2_id, species3_id, file_handler):
    """ Find the conservation status of all retained adjacencies, and write them
    into the output file

    Parameters
    ----------
    group1_adjs : list
        List of car_adjacencies fully conserved
    group2_adjs : list
        List of car_adjacencies partly conserved
    car_left : list
        For each car, left neighbor
    car_right : list
        For each car, right neighbor
    cars : list
        For each car id, ordered list of signed blocks in the car
    step_nb : int
        Current step of the ProCars method
    species1_id : list
        IDs of group1 species
    species2_id : list
        IDs of group2 species
    species3_id : list
        IDs of group3 species
    file_handler : fileObject
        Open file in which writing the given adjacencies and their information
    """
    for car_adjacency in group1_adjs:
        labels = adj_func.find_adj_info(car_adjacency, car_left, car_right,
                                        species1_id, species2_id, species3_id)
        utils_IO.write_adjacency(file_handler, cars, car_adjacency, 0, step_nb, labels)
    for car_adjacency in group2_adjs:
        labels = adj_func.find_adj_info(car_adjacency, car_left, car_right,
                                        species1_id, species2_id, species3_id)
        utils_IO.write_adjacency(file_handler, cars, car_adjacency, 1, step_nb, labels)


[docs]def find_adjacency_sets(nb_cars, nb_species, blocks, cars, blocks_car, block_position_in_car,
                        car_neigh_bin, species1_id, species2_id, species3_id):
    """ Finds all sets of adjacencies, according to their status : (non)conflicting,
    fully/partly conserved, etc.

    Parameters
    ----------
    nb_cars : int
        Total number of cars
    nb_species : int
        Total number of species
    blocks : list
        All blocks, for each species: array of arrays (blocks for each species)

        blocks = [[*list of blocks for spe_id=0*], [*list of blocks for spe_id=1*], ...]

        with *list of blocks for spe_id=1* =
        [[*list of blocks for spe_id=1, chromosome 1*],
        [*list of blocks for spe_id=1, chromosome 2*],
        ...]

        with *list of blocks for spe_id=1, chromosome 1* =
        [[start, end, num_bloc, orient],
        [start, end, num_bloc, orient],
        ... for the n blocks]
    cars : list
        For each car id, ordered list of signed blocks in the car
    blocks_car : list
        [[*list of block_cars for spe_id = 0*], [*list of block_cars for spe_id = 1*], ...]

        with *list of block_cars for spe_id = 1* =
        [[*list of blocks' cars for spe_id=1, chromosome 1*],
        [*list of blocks' cars for spe_id=1, chromosome 2*],
        ...]
    block_position_in_car : list
        Integer array such that block_position_in_car[block_id] = position of block in car
        to which it belongs
    car_neigh_bin : string
        Name of the binary file where car neighbors information is stored
    species1_id : list
        IDs of group1 species
    species2_id : list
        IDs of group2 species
    species3_id : list
        IDs of group3 species

    Returns
    -------
    tuple
        *car_left:* left neighbors of each car, such that left[spe_id][car_id] =
        [list of left neighbors of car_id in species spe_id]

        *car_right:* same as car_left, for right neighbors

        *fs_nc_adj:* fully conserved non-conflicting adjacencies

        *ps_nc2_adj:* partly conserved non-conflicting adjacencies

        *fs_c_adj:* fully conserved conflicting adjacencies

        *ps_nc_c_adj:* partly conserved adjacencies in conflict with fully conserved ones

        *ps_c_adj:* cnflicting partly conserved adjacencies
    """
    # COMPUTING CARS NEIGHBORS IN SPECIES
    [car_left, car_right] = compute_cars_neighbors(nb_cars, nb_species, blocks, cars, blocks_car,
                                                   block_position_in_car, car_neigh_bin)
    # COMPUTING CONSERVED CARS ADJACENCIES IN ANCESTOR
    res_adjs = compute_conserved_adjacencies(car_left, car_right, nb_cars,
                                             species1_id, species2_id, species3_id)
    conserved_left_D, conserved_right_D, conserved_left, conserved_right = res_adjs
    # Adjacency Matrices
    # ## Abbreviation:
    # fs : Fully conserved
    # ps : Partly Conserved
    # nc : Non-conflicting
    # c : Conflicting
    # Adj : Adjacences
    fs_matrix = compute_adjacencymatrix_from_neighbor_arrays(conserved_left_D,
                                                             conserved_right_D, nb_cars)
    [fs_nc_matrix, fs_c_matrix] = adj_func.split_adjacencymatrix(fs_matrix, nb_cars)
    ps_matrix = compute_adjacencymatrix_from_neighbor_arrays(conserved_left, conserved_right,
                                                             nb_cars)
    [ps_r_matrix, _] = diff_nc_matrix_m(fs_nc_matrix, ps_matrix, nb_cars)
    [ps_nc_matrix, ps_c_matrix] = adj_func.split_adjacencymatrix(ps_r_matrix, nb_cars)
    # COMPUTING CARS ADJACENCIES IN ANCESTOR
    [ps_nc2_matrix, ps_nc_c_matrix] = diff_nc_matrix_m(fs_c_matrix, ps_nc_matrix, nb_cars)

    fs_nc_pair = adj_func.compute_extremity_pair(fs_nc_matrix)
    fs_nc_adj = adj_func.compute_adjacencies(fs_nc_pair, nb_cars)
    nb_fs_nc_adj = len(fs_nc_adj)

    ps_nc2_pair = adj_func.compute_extremity_pair(ps_nc2_matrix)
    ps_nc2_adj = adj_func.compute_adjacencies(ps_nc2_pair, nb_cars)
    nb_ps_nc2_adj = len(ps_nc2_adj)

    fs_c_pair = adj_func.compute_extremity_pair(fs_c_matrix)
    fs_c_adj = adj_func.compute_adjacencies(fs_c_pair, nb_cars)
    ps_nc_c_pair = adj_func.compute_extremity_pair(ps_nc_c_matrix)
    ps_nc_c_adj = adj_func.compute_adjacencies(ps_nc_c_pair, nb_cars)
    ps_c_pair = adj_func.compute_extremity_pair(ps_c_matrix)
    ps_c_adj = adj_func.compute_adjacencies(ps_c_pair, nb_cars)
    print("... " + str(nb_fs_nc_adj) + " fully conserved adjacencies")
    print("... " + str(nb_ps_nc2_adj) + " partly conserved adjacencies")
    return car_left, car_right, fs_nc_adj, ps_nc2_adj, fs_c_adj, ps_nc_c_adj, ps_c_adj


[docs]def main(car_file_name, adjacency_file_name_prefix, step_nb,
         tree_bin, blocks_bin, car_bin, car_neigh_bin):
    """ Main method

    From a CAR file, it computes a set of conserved adjacencies between CARs
    for the ancestor node, and it splits this set into a set of non-conflicting
    adjacencies and a set of conflicting adjacencies.

    Parameters
    ----------
    car_file_name : string
        Prefix of the PQtree files
    adjacency_file_name_prefix : string
        Prefix of the Adjacencies files
    step_nb : int
        Current step of the ProCARs method
    tree_bin : string
        Name of the binary file where information about the species tree is stored
    blocks_bin : string
        Name of the binary file where information about blocks is stored
    car_bin : string
        Name of the binary file where information about current cars is stored
    car_neigh_bin : string
        Name of the binary file where information about car neighbors is stored

    Returns
    -------
    tuple
        *nb_final_adj:* total number of adjacencies added

        *nb_discarded:* total number of discarded adjacencies

        *fs_nc_adj:* fully conserved non-conflicting adjacencies

        *ps_nc2_adj:* partly conserved non-conflicting adjacencies

        *fs_c_adj:* conflicting fully conserved adjacencies

        *ps_nc_c_adj:* partly conserved adjacencies in conflict with fully conserved ones

        *ps_c_adj:* conflicting partly conserved adjacencies
    """
    # Read information from blocks and tree
    (_, _, _, _, _, nb_species, species1_id,
     species2_id, species3_id, _) = utils_IO.read_binary_file(tree_bin)
    (blocks, nb_blocks, _, _) = utils_IO.read_binary_file(blocks_bin)
    # Get CARs information
    res_car = commons_read_cars(nb_blocks, blocks, nb_species, car_file_name, step_nb, car_bin)
    [cars, nb_cars, block_position_in_car, blocks_car] = res_car
    # Split adjacencies into groups according to conservation and conflicts
    res_adjs = find_adjacency_sets(nb_cars, nb_species, blocks, cars, blocks_car,
                                   block_position_in_car, car_neigh_bin, species1_id,
                                   species2_id, species3_id)
    car_left, car_right, fs_nc_adj, ps_nc2_adj, fs_c_adj, ps_nc_c_adj, ps_c_adj = res_adjs
    # Count adjacencies
    nb_final_adj = len(fs_nc_adj) + len(ps_nc2_adj)
    nb_discarded = len(fs_c_adj) + len(ps_nc_c_adj) + len(ps_c_adj)
    # Writting found car adjacencies for ancestor
    last_adjs = adjacency_file_name_prefix + "_" + str(step_nb - 1) + ".txt"
    new_adjs = adjacency_file_name_prefix + "_" + str(step_nb) + ".txt"
    write_retained_adjs(last_adjs, new_adjs, fs_nc_adj, ps_nc2_adj, cars, step_nb,
                        car_left, car_right, species1_id, species2_id, species3_id)
    # Write discarded adjacencies
    filename = new_adjs + "_discarded.txt"
    write_discarded_adjs(filename, fs_c_adj, ps_nc_c_adj + ps_c_adj, car_left, car_right,
                         species1_id, species2_id, species3_id, cars, step_nb)
    # print some information
    print("... " + str(nb_discarded) + " discarded adjacencies\n")
    return nb_final_adj, nb_discarded, fs_nc_adj, ps_nc2_adj, fs_c_adj, ps_nc_c_adj, ps_c_adj
Navigation

Source code for procars.step_modules.compute_conserved_adjacencies

Quick search

Navigation