Source code for procars.utils.utils_IO

#!/usr/bin/env python
# -*- coding: utf-8 -*-:

"""
Copyright © Bonsai - LIFL (Université Lille 1, CNRS UMR 8022) and Inria-Lille Nord Europe

contact: aida.ouangraoua@inria.fr, amandine.perrin@inria.fr

This software is a computer program whose purpose is to progressively reconstruct ancestral
gene orders.

This software is governed by the CeCILL-B license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/or redistribute the software under the terms of the CeCILL-B
license as circulated by CEA, CNRS and Inria at the following URL
http://www.cecill.info, or in the LICENCE file at the root directory of this program.

The fact that you are presently reading this means that you have had
knowledge of the CeCILL-B license and that you accept its terms.

---------------------------------------------------

``utils_IO`` **module description**:

This module contains some methods used by the other modules when they need to read/write into files.

.. moduleauthor:: Aïda Ouangraoua, Amandine PERRIN

May 2014

"""

try:
    import cPickle as pickle
except ImportError:
    import pickle
from procars.utils import util_adjacency_functions


[docs]def save_binary_information(bin_filename, information):
    """ Saving information into a binary file

    Parameters
    ----------
    bin_filename : String
        Name of the binary file in which saving the CARs information
    information : list
        Information we need to save into a binary file
    """
    with open(bin_filename, "wb") as binary_file:
        my_pickler = pickle.Pickler(binary_file)
        my_pickler.dump(information)


[docs]def read_binary_file(bin_filename):
    """ Reading information stored into a binary file

    Parameters
    ----------
    bin_filename : string
        Name of the file in which reading information

    Returns
    -------
    tuple
        information stored in the file
    """
    with open(bin_filename, "rb") as binfile:
        my_unpickler = pickle.Unpickler(binfile)
        information = my_unpickler.load()
    return information


[docs]def read_adjacency_file(adjacency_file_name, nb_blocks, discarded=False):
    """ Read an adjacency file (or a discarded adjacency file), and find all blocks
    and their left and right neighbors

    used in ``compute_pqtree`` and ``resolve_conflict``

    Parameters
    ----------
    adjacency_file_name : string
        Name of the file containing the adjacencies to parse
    nb_blocks : int
        Total number of blocks
    discarded : boolean
        True if we are reading a file of discarded adjacencies
        (and hence there can be multiple left/right neighbors), False for a file of
        added adjacencies (one left/right neighbor per block_end)

    Returns
    -------
    tuple
        *left:* dict with:
            - if not discarded: for each block number, its left neighbor: left[bloc2] = bloc1
            - if discarded: for each block number, an array containing its potential
              left neighbors: left[bloc1] = [bloc2, -bloc3,..]

        *right:* dict with:
            - if not discarded: for each block number, its right neighbor: right[bloc1] = bloc2
            - if discarded: for each block number, an array containing its potential
              right neighbors: right[bloc1] = [-bloc2, bloc3,..]
    """
    left = {block_id: [] for block_id in range(1, nb_blocks + 1)}
    right = {block_id: [] for block_id in range(1, nb_blocks + 1)}
    with open(adjacency_file_name, "r") as adjacency_lines:
        for line in adjacency_lines:
            adjacency = line.split()
            signed_block1 = int(adjacency[0])
            signed_block2 = int(adjacency[1])
            if(signed_block1 > 0):
                right[signed_block1].append(signed_block2)
            else:
                left[-signed_block1].append(-signed_block2)
            if(signed_block2 > 0):
                left[signed_block2].append(signed_block1)
            else:
                right[-signed_block2].append(-signed_block1)
    if not discarded:
        left = {(key): (value[0] if value else 0) for key, value in left.iteritems()}
        right = {(key): (value[0] if value else 0) for key, value in right.iteritems()}
    return left, right


[docs]def read_car_file(car_file_name, nb_blocks):
    """ Function reading a CAR file

    Parameters
    ----------
    car_file_name : string
        Name of the file containing the current PQtree
    nb_blocks : int
        Total number of blocks

    Returns
    -------
    tuple
        *cars:* array of arrays (cars)

        *block_to_car:* integer array such that block_to_car[block_id] = car_id to which
        block_id belongs

        *block_position_in_car:* integer array such that block_position_in_car[block_id] =
        position of block in car to which it belongs
    """
    cars = [[]]  # array of cars ordered by increasing car_id
    # array : block_id -> car_id to which block belongs
    block_to_car = [0 for _ in xrange(nb_blocks + 1)]
    # array : block_id -> position of block in its car
    block_position_in_car = [0 for _ in xrange(nb_blocks + 1)]
    with open(car_file_name, "r") as carlines:
        car_length = [0]
        current_car = 0
        for line in carlines:
            if(line.strip()[0] != '#'):
                current_car += 1
                car_string = ((line.split("_Q")[1]).split("Q_")[0]).split()
                car_length.append(len(car_string))
                car_int = [int(cur_car) for cur_car in car_string]  # array of signed integers
                for num, cur_car in enumerate(car_int):
                    block_to_car[abs(cur_car)] = current_car
                    block_position_in_car[abs(cur_car)] = num
                cars.append(car_int)
    return cars, block_to_car, block_position_in_car


[docs]def read_conflict_adj_file(adj_file, nb_species, leaves, tree, spe_ids):
    """ Read file containing previously discarded adjacencies, and yield them with their information

    Parameters
    ----------
    adj_file : string
        File in which discarded adjacencies are written
    nb_species : int
        Total number of species
    leaves : list
        List of IDs of tree leaves (= genomes)
    tree : dict
        A tree structure
    spe_ids : dict
        Species as keys, and their corresponding ID as value

    Returns
    -------
    tuple
        tuple yield for each adjacency = each line of the file:

        *labels:* dict with species as keys, and an int specifying if the adjacency is present (2)
        or absent (1) in the given species.

        *adj_id:* int, ID of current adjacency

        *adjacency:* tuple, current adjacency (num_bloc1, num_bloc2)

        *step_car_adj:* tuple, current car adjacency, type of adjacency, and step at which it was
        found (num_car1, num_car2, type, step)

        .. warning:: These Python objects are yield and not returned
    """
    with open(adj_file, "r") as adjacency_lines:
        for adj_id, line in enumerate(adjacency_lines):
            words = line.split()
            adjacency = (int(words[0]), int(words[1]))   # pair of signed blocks
            # car adjacency, type of adj and step at which this adjacency was added:
            step_car_adj = words[2:]
            # list of labels (given adjacency present=1/absent=0) for each species
            labels_list = words[-nb_species:]   # nb_species last words are the labels
            # leave_nums = {leaf_num: leaf_name, ...}
            leave_nums = {leaf_num: tree[leaf_num][0] for leaf_num in leaves}
            # leave_ids = {leaf_num: (leaf_id in spe_ids), ...}
            leave_ids = {leaf_num: spe_ids[spe] for leaf_num, spe in leave_nums.iteritems()}
            # labels = {leaf_num: label (1=absent/2=present)}
            labels = {leaf_num: [int(labels_list[leaf_id]) + 1]
                      for leaf_num, leaf_id in leave_ids.iteritems()}
            yield labels, adj_id, adjacency, step_car_adj


[docs]def write_retained_conflict_adjs(filename, adj_infos, maximum_set, adj_ids):
    """ Write retained adjacencies after a conflict resolution into a txt file
    -> next adjacency file

    Parameters
    ----------
    filename : string
        Name of the file in which retained adjacencies are stored
    adj_infos : dict
        Dictionary with adjacency IDs as keys, and values are a list with the car adjacency
        (car1, car2), the type of adjacency, the step at which it was found and the presence
        of this adjacency in each species
    maximum_set : list
        List of retained adjacency ids
    adj_ids : dict
        Keys are adjacency IDs, and values are the adjacency corresponding to the ID
    """
    with open(filename, "w") as output_file:
        # write added adjacencies at the beginning of the file
        for adj_id in maximum_set:
            #  add block adj and car adj and type of adj with all information
            words = list(adj_ids[adj_id]) + list(adj_infos[adj_id][: 3])
            words += [int(adj_infos[adj_id][3])]  # step number
            words += [int(lab) for lab in adj_infos[adj_id][4:]]  # labels
            str_words = [str(num) for num in words]
            str_words = " ".join(str_words)
            output_file.write(str_words + "\n")


[docs]def write_adjacency(output_file, cars, car_adjacency, adj_type, step_nb, labels):
    """ Writes all adjacency information into a file handler

    Parameters
    ----------
    output_file : FileHandler
        Open file or StringIO in which writting status of adjacencies
    cars : list
        List of current cars
    car_adjacency : list
        List of two signed blocks (a given adjacency)
    adj_type : int
        Type of adjacency : 0 if fully, 1 if partly
    step_nb : int
        Current step of the ProCars Method
    labels : string
        More information (presence/absence of the adjacency in each species)
    """
    signed_block1 = util_adjacency_functions.car_to_block(car_adjacency[0], 1, cars)
    signed_block2 = util_adjacency_functions.car_to_block(car_adjacency[1], 0, cars)
    output_file.write(str(signed_block1) + " " + str(signed_block2) + " " +
                      str(car_adjacency[0]) + " " + str(car_adjacency[1]) + " " +
                      str(adj_type) + " " + str(step_nb) + labels + "\n")


[docs]def write_car_file(car_filename, all_cars):
    """ Writes all CARs into the given file

    Parameters
    ----------
    car_filename : String
        Name of the file in which all CARs are stored (= PQtree file)
    all_cars : list
        List of lists, such that all_cars[car_num] = [bloc1, bloc2, ...] = all ordered signed
        blocks of car number *car_num*
    """
    with open(car_filename, "w") as carfile:
        for index_car, car in enumerate(all_cars):
            car_line = "_Q "
            car_line += " ".join([str(signed_block) for signed_block in car]) + " Q_\n"
            carfile.write("#CAR" + str(index_car + 1) + "\n")
            carfile.write(car_line)
Navigation

Source code for procars.utils.utils_IO

Quick search

Navigation