Source code for procars.utils.util_adjacency_functions

# -*- coding: utf-8 -*-:
"""
Copyright © Bonsai - LIFL (Université Lille 1, CNRS UMR 8022) and Inria-Lille Nord Europe

contact: aida.ouangraoua@inria.fr, amandine.perrin@inria.fr

This software is a computer program whose purpose is to progressively reconstruct ancestral
gene orders.

This software is governed by the CeCILL-B license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/or redistribute the software under the terms of the CeCILL-B
license as circulated by CEA, CNRS and Inria at the following URL
http://www.cecill.info, or in the LICENCE file at the root directory of this program.

The fact that you are presently reading this means that you have had
knowledge of the CeCILL-B license and that you accept its terms.

---------------------------------------------------

``util_adjacency_functions`` **module description**:

Module containing methods used by other modules like ``compute_resolved_conflicts`` and
``compute_conserved_adjacencies`` in order to find required adjacencies.

It contains for exemple methods in order to:

    - split a set of adjacencies in a set of conflicting adjacencies and a set of
      non-conflicting ones.
    - Find the block numbers involved in a given adjacency according to the adjacency ID
      in the matrix

.. moduleauthor:: Aïda Ouangraoua, Amandine PERRIN

February 2014

"""
from procars.utils.util_classes import SparseMatrix


[docs]def split_adjacencymatrix(adjacencymatrix, nb_blocks): """ Function splitting an adjacency matrix into two matrices: the non-conflicting and the conflicting adjacency matrices Parameters ---------- adjacencymatrix : SparseMatrix 0-1 Matrix, the adjacency matrix (1 for each possible adjacency, 0 otherwise) nb_blocks : int Total number of blocks Returns ------- tuple of SparseMatrix *non_conflicting:* 0-1 Matrix containing non-conflicting adjs *conflicting:* 0-1 Matrix containing conflicting adjs. """ # Matrix containing only non conflicting adjacencies non_conflicting = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1) # Matrix containing only conflicting adjacencies conflicting = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1) # for each block adjacency in adjacencymatrix, check if it is conflicting or not for extremity_id1, y_ids in adjacencymatrix.sparse_matrix.iteritems(): for extremity_id2 in y_ids: if sum(adjacencymatrix[extremity_id1]) + sum(adjacencymatrix[extremity_id2]) == 2: non_conflicting[extremity_id1, extremity_id2] = 1 non_conflicting[extremity_id2, extremity_id1] = 1 else: conflicting[extremity_id1, extremity_id2] = 1 conflicting[extremity_id2, extremity_id1] = 1 return non_conflicting, conflicting
[docs]def compute_extremity_pair(adjacencymatrix): """ Function computing the extremity pairs stored into an adjacency matrix: it finds all couples of indexes for which an adjacency exists (=indexes for which adjacencymatrix[x, y] != 0) Parameters ---------- adjacencymatrix : SparseMatrix Sparse matrix, with 1 at indices of possible adjacencies, 0 otherwise Returns ------- list *extremity_pairs:* array containing pairs of integers (extremities) """ extremity_pairs = adjacencymatrix.pairs() extremity_pairs = [elem for elem in extremity_pairs if elem[0] < elem[1]] return extremity_pairs
[docs]def compute_adjacencies(extremity_pairs, nb_blocks): """ Function computing the CAR adjacencies (car1, car2) stored into an extremity pairs array Parameters ---------- extremity_pairs : list Array containing pairs of extremities (corresponding to indexes of SparseMatrix) nb_blocks : int Total number of blocks Returns ------- list *adjacencies:* array containing pairs of signed integers (adjacencies) """ adjacencies = [] # Array containing adjacencies (pairs of signed blocks) for pair in extremity_pairs: [extremity_id1, extremity_id2] = pair adj = [] if(extremity_id1 <= nb_blocks): adj.append(extremity_id1) else: adj.append(-(extremity_id1 - nb_blocks)) if(extremity_id2 <= nb_blocks): adj.append(-extremity_id2) else: adj.append(extremity_id2 - nb_blocks) adjacencies.append(adj) return adjacencies
[docs]def car_to_block(signed_car, side, cars): """ From a signed car, returns the signed block at the ``side`` extremity Parameters ---------- signed_car : int A given CAR ID side : int ``0`` for left end of the car, ``1`` for right end cars : list list of all cars Returns ------- int *signed block*, which is at the given end of the given car """ if(signed_car > 0): if(side == 1): signed_block = cars[signed_car][-1] elif(side == 0): signed_block = cars[signed_car][0] else: if(side == 1): signed_block = -cars[-signed_car][0] elif(side == 0): signed_block = -cars[-signed_car][-1] return signed_block
[docs]def find_adj_info(car_adjacency, car_left, car_right, species1_id, species2_id, species3_id): """ Find information on a car_adjacency: in which genomes it is present Parameters ---------- car_adjacency : tuple The given car adjacency (two car numbers) car_left : list For each car, left neighbor car_right : list For each car, right neighbor species1_id : list Ids of species in group1 in tree partition from ancestor species2_id : list Ids of species in group2 in tree partition from ancestor species3_id : list Ids of species in group3 in tree partition from ancestor Returns ------- list *labels:* a list of integers indicating if the adjacency is present (1) or absent (0) in each species """ supporting_species_labels = conservation_status(car_adjacency, car_left, car_right, species1_id, species2_id, species3_id)[1] supporting_species_labels = [str(lab) for lab in supporting_species_labels] labels = " " + " ".join(supporting_species_labels) return labels
[docs]def conservation_status(adjacency, left, right, species1_id, species2_id, species3_id): """ Function computing the conservation status of an adjacency *nbgroup* = number of group in which adjacency is present. If *nbgroup = 3*, the adjacency is 'fully'. If *nbgroup = 2*, the adjacency is partly. Otherwise, the adjacency is not conserved. Parameters ---------- adjacency : list Pair of signed integers left : list Array such that *left[block_id]* is an array containing all left neighbors of block *block_id* right : list Array such that *right[block_id]* is an array containing all right neighbors of *block_id* species1_id: list Ids of group1 species species2_id: list Ids of group2 species species3_id: list Ids of group3 species Returns ------- tuple *nbgroup:* integer, number of group(s) in which adjacency is present *species_supporting:* 0-1 array indicating species in which adjacency is present (1) or absent (0) """ nbgroup = 0 # number of groups in the species partition (input1, input2, output) # in wich the adjacency appears species_supporting = [] # array such that species_supporting[spe_id] = 1 if species contains # adjacency, and species_supporting[spe_id] = 0 otherwise signed_block1, signed_block2 = adjacency block_id = abs(signed_block1) side_block = 0 side_map = {} if(signed_block1 > 0): side_map = right side_block = signed_block2 else: side_map = left side_block = -signed_block2 output = 0 input1 = 0 input2 = 0 for spe in species1_id: if(side_block in side_map[spe][block_id]): output = 1 species_supporting.append(1) else: species_supporting.append(0) for spe in species2_id: if(side_block in side_map[spe][block_id]): input1 = 1 species_supporting.append(1) else: species_supporting.append(0) for spe in species3_id: if(side_block in side_map[spe][block_id]): input2 = 1 species_supporting.append(1) else: species_supporting.append(0) nbgroup = output + input1 + input2 return nbgroup, species_supporting