Source code for bdgenomics.mango.pileup.sources

#
# Licensed to Big Data Genomics (BDG) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The BDG licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

r"""
=======
Sources
=======
.. currentmodule:: bdgenomics.mango.pileup.sources

Sources specify where the genomic data comes from. Sources can come from a url, a GA4GHDatasource, or a JSON string of GA4GH formatted data.

.. autosummary::
    :toctree: _generate/

    BamDataSource
    VcfDataSource
    TwoBitDataSource
    BigBedDataSource
    GA4GHAlignmentJson
    GA4GHVariantJson
    GA4GHFeatureJson
    GA4GHAlignmentSource
    GA4GHVariantSource
    GA4GHFeatureSource
"""

# generic data source for pileup.js
class Source:
    #: dictionary containing source elements (viz, source, sourceOptions, label)
    dict_ = {}
    #: name that pileup.js uses to identify sources
    name = None


# Generic data sources
class GA4GHSource(Source):
    def __init__(self, endpoint, readGroupId, callSetIds = None):
        """ Initializes GA4GHSource.

        Args:
            :param str: url endpoint
            :param str: read group id
            :param str: optional call set ID for variants
        
        """

        #: dictionary containing source elements (viz, source, sourceOptions, label)
        self.dict_ = {
            'endpoint': endpoint,
            'readGroupId': readGroupId
        }


# For data stored as JSON strings
class jsonString(Source):
    def __init__(self, json):   
        """ Initializes GA4GH JSON.

        Args:
            :param str: json in GA4GH format
        
        """

        #: dictionary containing source elements (viz, source, sourceOptions, label)
        self.dict_ = json

class DataFrameSource(Source):
    
    def __init__(self, dataframe):
        """Initializes dataframe. Converts it to raw json. 

        Args:
            :param dataframe: dataframe
        """
        feature_transformed_json = dataframe._mango_to_json
        self.dict_ = feature_transformed_json
        
        #: name that pileup.js uses to identify sources --> temporary
        self.name = dataframe._pileup_visualization
    
    
       

# can be used for TwoBit, vcf, BigBedDataSource, or BamDataSource
class FileSource(Source):  
    def __init__(self, url, indexUrl = None):
        """ Initializes file sources.

        Args:
            :param str: url to file
            :param str: indexUrl to index file
        
        """

        #: dictionary containing source elements (viz, source, sourceOptions, label)
        self.dict_ = {
            'url': url,
            'indexUrl': indexUrl
        }

##### Specific data sources build from generic data sources #####
# file sources
[docs]class BamDataSource(FileSource): """ Initializes file source from bam file endpoint. Args: :param str: url to file :param str: indexUrl to index file """ #: name that pileup.js uses to identify sources name = 'bam'
[docs]class VcfDataSource(FileSource): """ Initializes file source from vcf file endpoint. Args: :param str: url to file :param str: indexUrl to index file """ #: name that pileup.js uses to identify sources name = 'vcf'
[docs]class TwoBitDataSource(FileSource): """ Initializes file source from twoBit file endpoint. Args: :param str: url to file """ name = 'twoBit'
[docs]class BigBedDataSource(FileSource): """ Initializes file source from big bed (.bb) file endpoint. Args: :param str: url to file """ #: name that pileup.js uses to identify sources name = 'bigBed'
# json built sources
[docs]class GA4GHAlignmentJson(jsonString): """ Initializes GA4GH Alignment JSON. Args: :param str: json in GA4GH format """ #: name that pileup.js uses to identify sources name = 'alignmentJson'
[docs]class GA4GHVariantJson(jsonString): """ Initializes GA4GH variant JSON. Args: :param str: json in GA4GH format """ #: name that pileup.js uses to identify sources name = 'variantJson'
[docs]class GA4GHFeatureJson(jsonString): """ Initializes GA4GH feature JSON. Args: :param str: json in GA4GH format """ #: name that pileup.js uses to identify sources name = 'featureJson'
# GA4GH Sources
[docs]class GA4GHAlignmentSource(GA4GHSource): """ Initializes GA4GHAlignmentSource. Args: :param str: url endpoint :param str: read group id """ #: name that pileup.js uses to identify sources name = 'GAReadAlignment'
[docs]class GA4GHVariantSource(GA4GHSource): """ Initializes GA4GHSource. Args: :param str: url endpoint :param str: call set ID :param str: optional call set ID for variants """ #: name that pileup.js uses to identify sources name = 'GAVariant'
[docs]class GA4GHFeatureSource(GA4GHSource): """ Initializes GA4GHFeatureSource. Args: :param str: url endpoint """ #: name that pileup.js uses to identify sources name = 'GAFeature'
# dictionary of visualizations and corresponding data sources vizNames = { 'coverage': [BamDataSource.name, GA4GHFeatureJson.name], 'pileup': [BamDataSource.name, GA4GHAlignmentJson.name, GA4GHAlignmentSource.name], 'features': [BigBedDataSource.name, GA4GHFeatureJson.name, GA4GHFeatureSource.name], 'variants': [VcfDataSource.name, GA4GHVariantJson.name, GA4GHVariantSource.name], 'genome':[TwoBitDataSource.name], 'genes': [BigBedDataSource.name], 'scale':[], 'location': [], 'genotypes': [VcfDataSource.name, GA4GHVariantJson.name, GA4GHVariantSource.name] } # dictionary of source ids accepted by pileup and corresponding source classes sourceNames = { BamDataSource.name: BamDataSource, VcfDataSource.name: VcfDataSource, TwoBitDataSource.name: TwoBitDataSource, BigBedDataSource.name: BigBedDataSource, GA4GHAlignmentJson.name: GA4GHAlignmentJson, GA4GHFeatureJson.name: GA4GHFeatureJson, GA4GHVariantJson.name: GA4GHVariantJson, GA4GHAlignmentSource.name: GA4GHAlignmentSource, GA4GHVariantSource.name: GA4GHVariantSource, GA4GHFeatureSource.name: GA4GHFeatureSource, DataFrameSource.name: DataFrameSource }