Source code for transcriptic.analysis.kinetics

try:
    import pandas as pd
    import plotly as py
    import plotly.graph_objs as go
except ImportError:
    raise ImportError(
        "Please run `pip install transcriptic[analysis] if you "
        "would like to use the Transcriptic analysis module."
    )


class _Kinetics(object):
    """
    A Kinetics object generalizes the parsing of a time series of datasets
    Parameters
    ----------
    datasets: List[dataset]
        List of Datasets
    """

    def __init__(self, datasets):
        self.datasets = datasets
        self.readings = pd.concat([ds.data for ds in datasets])
        self.readings.index = pd.to_datetime(
            [ds.attributes["warp"]["completed_at"] for ds in datasets]
        )
        self.readings = self.readings.transpose()


[docs]class Spectrophotometry(_Kinetics):
    """
    A Spectrophotomery object is used to analyze a kinetic series of PlateRead datasets

    Attributes
    ----------
    properties: DataFrame
        DataFrame of aliquot properties for each well, useful for groupby operations during plots
    readings: DataFrame
        DataFrame of readings for each well at different time points
    operation: str
        Operation used for generating these growth curves (e.g. Absorbance)

    """

    def __init__(self, datasets):
        """
        Parameters
        ----------
        datasets: List[dataset]
            List of Datasets objects. Currently restricted to those generated by 'absorbance', 'fluorescence'
            and 'luminescence' operations
        """
        operation_set = set([ds.operation for ds in datasets])
        if len(operation_set) > 1:
            raise RuntimeError("Input Datasets must all be of the same type.")
        self.operation = operation_set.pop()
        if self.operation not in ["absorbance", "fluorescence", "luminescence"]:
            raise RuntimeError(
                f"{self.operation} has to be of type absorbance, "
                f"fluorescence or luminescence"
            )
        super(Spectrophotometry, self).__init__(datasets)
        # Assume that well names are consistent across all runs
        ref_dataset = datasets[0]
        ref_container = ref_dataset.container
        # Check if well_map is defined
        if len(ref_container.well_map) != 0:
            self.properties = pd.DataFrame.from_dict(
                ref_container.well_map, orient="index"
            )
        else:
            self.properties = pd.DataFrame.from_dict(
                {
                    ref_container.container_type.robotize(x): x
                    for x in ref_dataset.data.columns
                    if x not in ["GAIN"]
                },
                orient="index",
            )
        self.properties.columns = ["name"]
        self.properties.insert(
            1,
            "column",
            (self.properties.index % ref_container.container_type.col_count),
        )
        self.properties.insert(
            1, "row", (self.properties.index // ref_container.container_type.col_count)
        )
        self.properties.row = self.properties.row.apply(
            lambda x: "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[x]
        )
        self.properties.index = [
            ref_container.container_type.humanize(int(x))
            for x in list(self.properties.index)
        ]

[docs]    def plot(
        self,
        wells="*",
        groupby=None,
        title=None,
        xlabel=None,
        ylabel=None,
        max_legend_len=20,
    ):
        """
        This generates a plot of the kinetics curve. Note that this function is meant for use under a Jupyter notebook
        environment

        Example Usage:

        .. code-block:: python

            from transcriptic.analysis.kinetics import Spectrophotometry
            growth_curve = Spectrophotometry(myRun.data.Datasets)
            growth_curve.plot(wells=["A1", "A2", "B1", "B2"])
            growth_curve.plot(wells=["A1", "A2", "B1", "B2"], groupby="row", title="Row Groups")
            growth_curve.plot(wells=["A1", "A2", "B1", "B2"], groupby="name", ylabel="Absorbance Units")
            growth_curve.plot(groupby="name", max_legend_len=40)

        Parameters
        ----------
        wells: Optional[list or str]
            If not specified, this plots all the wells associated with the Datasets given. Otherwise, specifiy
            a list of well indices (["A1", "B1"]) or a specific well ("A1")
        groupby: Optional[str]
            When specified, this groups the wells with the same property value together. On the plot, each group will
            be represented by a single curve with the mean values and error bars of 1 std. dev. away from the mean
        title: Optional[str]
            Plot title. Default: "Kinectics Curve (`run-id`)"
        xlabel: Optional[str]
            Plot x-axis label. Default: "Time"
        ylabel: Optional[str]
            Plot y-axis label. Default: "`Operation` (`Wavelength`)"
        max_legend_len
            Maximum number of characters for the legend labels before truncating. Default: 20

        Returns
        -------
        IPlot
            Plotly iplot object. Will be rendered nicely in Jupyter notebook instance
        """
        # TODO: Shift init_notebook_mode() to start of notebook instance
        py.offline.init_notebook_mode()

        if isinstance(wells, str):
            if wells != "*":
                wells = [wells]
            else:
                well_readings = self.readings
                wells = list(self.properties.index)
        if isinstance(wells, list):
            well_readings = self.readings.loc[wells]

        if not groupby:
            traces = [
                go.Scatter(
                    x=self.readings.columns,
                    y=well_readings.loc[well],
                    name=self.properties["name"].loc[well],
                )
                for well in wells
            ]
        else:
            if groupby not in self.properties.columns:
                raise ValueError(
                    f"'{groupby}' not found in the properties table. "
                    f"Please specify a column which exists"
                )
            grouped = self.properties.groupby(groupby)
            index_list = [grouped.get_group(group).index for group in grouped.groups]
            reading_map = []
            for indx in index_list:
                common_set = set(well_readings.index).intersection(set(indx))
                if len(common_set) != 0:
                    reading_map.append(well_readings.loc[common_set])
            if len(reading_map) != 0:
                traces = [
                    go.Scatter(
                        x=self.readings.columns,
                        y=reading.mean(),
                        name=self._truncate_name(
                            self.properties[groupby].loc[reading.iloc[0].name],
                            max_legend_len,
                        ),
                        error_y=dict(type="data", array=reading.std(), visible=True),
                    )
                    for reading in reading_map
                ]
            else:
                raise ValueError(
                    f"No common groups found for specified groupby: {groupby}"
                )

        # Assume all data is generated from the same run-id for now
        if not title:
            title = f"Kinetics Curve ({self.datasets[0].attributes['instruction']['run']['id']})"
        if not xlabel:
            xlabel = "Time"
        if not ylabel:
            if self.operation == "absorbance":
                ylabel = f"RAU ({self.datasets[0].attributes['instruction']['operation']['wavelength']})"
            elif self.operation == "fluorescence":
                ylabel = (
                    f"RFU ({self.datasets[0].attributes['instruction']['operation']['excitation']}/"
                    f"{self.datasets[0].attributes['instruction']['operation']['emission']})"
                )
            elif self.operation == "luminescence":
                ylabel = "Luminescence"

        layout = go.Layout(
            title=title,
            xaxis=dict(
                title=xlabel,
                titlefont=dict(
                    family="Courier New, monospace", size=18, color="#7f7f7f"
                ),
            ),
            yaxis=dict(
                title=ylabel,
                titlefont=dict(
                    family="Courier New, monospace", size=18, color="#7f7f7f"
                ),
            ),
            legend=dict(x=100, y=1),
        )

        fig = go.Figure(data=traces, layout=layout)
        return py.offline.iplot(fig)

    @staticmethod
    def _truncate_name(string, max_len=20):
        """Truncates string to max_len number of characters, adds ellipses instead if its too long"""
        if len(string) > max_len:
            return string[: (max_len - 3)] + "..."
        else:
            return string