spacetx · nickeener · Oct 11, 2021 · Oct 12, 2021 · Oct 12, 2021 · Oct 13, 2021
diff --git a/starfish/core/intensity_table/decoded_intensity_table.py b/starfish/core/intensity_table/decoded_intensity_table.py
@@ -17,22 +17,18 @@ class DecodedIntensityTable(IntensityTable):
     """
     DecodedIntensityTable is a container for spot or pixel features extracted from image data
     that have been decoded. It is the primary output from starfish :py:class:`Decode` methods.
-
     An IntensityTable records the numeric intensity of a set of features in each
     :code:`(round, channel)` tile in which the feature is identified.
     The :py:class:`IntensityTable` has shape
     :code:`(n_feature, n_channel, n_round)`.
-
     Some :py:class:`SpotFinder` methods identify a position and search for Gaussian blobs in a
     small radius, only recording intensities if they are found in a given tile. Other
     :py:class:SpotFinder: approaches find blobs in a max-projection and measure them everywhere.
     As a result, some IntensityTables will be dense, and others will contain :code:`np.nan`
     entries where no feature was detected.
-
     Examples
     --------
     Create an IntensityTable using the ``synthetic_intensities`` method::
-
         >>> from starfish.core.test.factories import SyntheticData
         >>> sd = SyntheticData(n_ch=3, n_round=4, n_codes=2)
         >>> codes = sd.codebook()
@@ -41,7 +37,6 @@ class DecodedIntensityTable(IntensityTable):
         array([[[    0.,     0.,     0.,     0.],
                 [    0.,     0.,  8022., 12412.],
                 [11160.,  9546.,     0.,     0.]],
-
                [[    0.,     0.,     0.,     0.],
                 [    0.,     0., 10506., 10830.],
                 [11172., 12331.,     0.,     0.]]])
@@ -54,7 +49,6 @@ class DecodedIntensityTable(IntensityTable):
         * c          (c) int64 0 1 2
         * h          (h) int64 0 1 2 3
          target     (features) object 08b1a822-a1b4-4e06-81ea-8a4bd2b004a9 ...
-
         """
 
     __slots__ = ()
@@ -65,7 +59,9 @@ def from_intensity_table(
             intensities: IntensityTable,
             targets: Tuple[str, np.ndarray],
             distances: Optional[Tuple[str, np.ndarray]] = None,
-            passes_threshold: Optional[Tuple[str, np.ndarray]] = None):
+            passes_threshold: Optional[Tuple[str, np.ndarray]] = None,
+            rounds_used: Optional[Tuple[str, np.ndarray]] = None):
+
         """
         Assign target values to intensities.
 
@@ -80,6 +76,9 @@ def from_intensity_table(
         passes_threshold : Optional[Tuple[str, np.ndarray]]
             Corresponding array of boolean values indicating if each itensity passed
             given thresholds.
+        rounds_used: Optional[Tuple[str, np.ndarray]]
+            Corresponding array of integers indicated the number of rounds this
+            decoded intensity was found in
 
         Returns
         -------
@@ -92,6 +91,8 @@ def from_intensity_table(
             intensities[Features.DISTANCE] = distances
         if passes_threshold:
             intensities[Features.PASSES_THRESHOLDS] = passes_threshold
+        if rounds_used:
+            intensities['rounds_used'] = rounds_used
         return intensities
 
     def to_decoded_dataframe(self) -> DecodedSpots:
@@ -108,19 +109,15 @@ def to_mermaid(self, filename: str) -> pd.DataFrame:
         """
         Writes a .csv.gz file in columnar format that is readable by MERMAID visualization
         software.
-
         To run MERMAID, follow the installation instructions for that repository and simply
         replace the data.csv.gz file with the output of this function.
-
         Parameters
         ----------
         filename : str
             Name for compressed-gzipped MERMAID data file. Should end in '.csv.gz'.
-
         Notes
         ------
         See also https://github.com/JEFworks/MERmaid
-
         """
         # construct the MERMAID dataframe. As MERMAID adds support for non-categorical variables,
         # additional columns can be added here
@@ -139,9 +136,7 @@ def to_mermaid(self, filename: str) -> pd.DataFrame:
     def to_expression_matrix(self) -> ExpressionMatrix:
         """
         Generates a cell x gene count matrix where each cell is annotated with spatial metadata.
-
         Requires that spots in the IntensityTable have been assigned to cells.
-
         Returns
         -------
         ExpressionMatrix :

diff --git a/starfish/core/spots/DecodeSpots/__init__.py b/starfish/core/spots/DecodeSpots/__init__.py
@@ -1,4 +1,5 @@
 from ._base import DecodeSpotsAlgorithm
+from .check_all_decoder import CheckAll
 from .metric_decoder import MetricDistance
 from .per_round_max_channel_decoder import PerRoundMaxChannel
 from .simple_lookup_decoder import SimpleLookupDecoder

diff --git a/starfish/core/spots/DecodeSpots/check_all_decoder.py b/starfish/core/spots/DecodeSpots/check_all_decoder.py
@@ -0,0 +1,206 @@
+from copy import deepcopy
+from typing import Any, Hashable, Mapping, Tuple
+
+import numpy as np
+import pandas as pd
+
+from starfish.core.codebook.codebook import Codebook
+from starfish.core.intensity_table.decoded_intensity_table import DecodedIntensityTable
+from starfish.core.intensity_table.intensity_table import IntensityTable
+from starfish.core.intensity_table.intensity_table_coordinates import \
+    transfer_physical_coords_to_intensity_table
+from starfish.core.types import SpotFindingResults
+from starfish.types import Axes, Features
+from ._base import DecodeSpotsAlgorithm
+from .check_all_funcs import buildBarcodes, cleanup, createRefDicts, decoder, distanceFilter, \
+    removeUsedSpots
+from .util import _merge_spots_by_round
+
+class CheckAll(DecodeSpotsAlgorithm):
+    """
+    Decode spots by generating all possible combinations of spots to form barcodes given a radius
+    distance that spots must be from each other in order to form a barcode. Then chooses the best
+    set of nonoverlapping spot combinations by choosing the ones with the least spatial variance
+    of their spot coordinates and are also found to be best for multiple spots in the barcode
+    (see algorithm below). Allows for error correction rounds.
+
+    (see input parmeters below)
+    1. For each spot in each round, find all neighbors in other rounds that are within the search
+    radius
+    2. For each spot in each round, build all possible full length barcodes based on the channel
+    labels of the spot's neighbors and itself
+    3. Drop barcodes that don't have a matching target in the codebook
+    4. Choose the "best" barcode of each spot's possible target matching barcodes by calculating
+    the sum of variances for each of the spatial coordinates of the spots that make up each barcode
+    and choosing the minimum distance barcode (if there is a tie, they are all dropped as
+    ambiguous). Each spot is assigned a "best" barcode in this way.
+    5. Only keep barcodes/targets that were found as "best" using at least 2 of the spots that make
+    each up
+    6. Find maximum independent set (approximation) of the spot combinations so no two barcodes use
+    the same spot
+    7. Remove all spots used in decoded targets that passed the previous filtering steps from the
+    original set of spots
+    8. Rerun steps 2-5 for barcodes that use less than the full set of rounds for codebook
+    matching (how many rounds can be dropped determined by error_rounds parameter)
+
+    Parameters
+    ----------
+    codebook : Codebook
+        Contains codes to decode IntensityTable
+    search_radius : float
+        Number of pixels over which to search for spots in other rounds and channels.
+    error_rounds : int
+        Maximum hamming distance a barcode can be from it's target in the codebook and still be
+        uniquely identified (i.e. number of error correction rounds in each the experiment)
+    """
+
+    def __init__(
+            self,
+            codebook: Codebook,
+            search_radius: float=3,
+            error_rounds: int=0):
+        self.codebook = codebook
+        self.searchRadius = search_radius
+        self.errorRounds = error_rounds
+
+    def run(self,
+            spots: SpotFindingResults,
+            n_processes: int=1,
+            *args) -> DecodedIntensityTable:
+        """
+        Decode spots by finding the set of nonoverlapping barcodes that have the minimum spatial
+        variance within each barcode
+
+        Parameters
+        ----------
+        spots: SpotFindingResults
+            A Dict of tile indices and their corresponding measured spots
+
+        n_processes: int
+            Number of threads to run decoder in parallel with
+
+        Returns
+        -------
+        DecodedIntensityTable :
+            IntensityTable decoded and appended with Features.TARGET and Features.QUALITY values.
+
+        """
+
+        # Rename n_processes (trying to stay consistent between starFISH's _ variables and my
+        # camel case ones)
+        numJobs = n_processes
+
+        # If using an search radius exactly equal to a possible distance between two pixels
+        # (ex: 1), some distances will be calculated as slightly less than their exact distance
+        # (either due to rounding or precision errors) so search radius needs to be slightly
+        # increased to ensure this doesn't happen
+        self.searchRadius += 0.001
+
+        # Create dictionary where keys are round labels and the values are pandas dataframes
+        # containing information on the spots found in that round
+        spotTables = _merge_spots_by_round(spots)
+
+        # Add one to channels labels (prevents collisions between hashes of barcodes later)
+        for r in spots.round_labels:
+            spotTables[r]['c'] += 1
+
+        # Set list of round omission numbers to loop through
+        roundOmits = range(self.errorRounds + 1)
+
+        # Decode for each round omission number, store results in allCodes table
+        allCodes = pd.DataFrame()
+        for currentRoundOmitNum in roundOmits:
+
+            # Create necessary reference dictionaries
+            neighborDict, channelDict, spotCoords = createRefDicts(spotTables, self.searchRadius)
+
+            # Chooses best barcode for all spots in each round sequentially (possible barcode
+            # space can become quite large which can increase memory needs so I do it this way so
+            # we only need to store all potential barcodes that originate from one round at a
+            # time)
+            decodedTables = {}
+            for r in range(len(spotTables)):
+                roundData = deepcopy(spotTables[r])
+                roundData = roundData.drop(['intensity', 'z', 'y', 'x', 'radius', 'c'], axis=1)
+                roundData.index += 1
+
+                # Create dictionary of dataframes (based on spotTables data) that contains
+                # additional columns for each spot containing all the possible barcodes that
+                # could be constructed from the neighbors of that spot
+                roundData = buildBarcodes(roundData, neighborDict, currentRoundOmitNum,
+                                          channelDict, r, numJobs)
+
+                # Match possible barcodes to codebook and add new columns with info about barcodes
+                # that had a codebook match
+                roundData = decoder(roundData, self.codebook, currentRoundOmitNum, r, numJobs)
+
+                # Choose most likely barcode for each spot in each round by find the possible
+                # decodable barcode with the least spatial variance between the spots that made up
+                # the barcode
+                roundData = distanceFilter(roundData, spotCoords, r, numJobs)
+
+                # Assign to DecodedTables dictionary
+                decodedTables[r] = roundData
+
+            # Only do the following if barcodes were founds
+            totalSpots = sum([len(decodedTables[table]) for table in decodedTables])
+            if totalSpots:
+
+                # Turn spot table dictionary into single table, filter barcodes by round frequency,
+                # add additional information, and choose between barcodes that have overlapping
+                # spots
+                finalCodes = cleanup(decodedTables, spotCoords, channelDict)
+
+                # If this is not the last round omission number to run, remove spots that have just
+                # been found to be in passing barcodes from spotTables so they are not used for the
+                # next round omission number
+                if currentRoundOmitNum != roundOmits[-1]:
+                    spotTables = removeUsedSpots(finalCodes, spotTables)
+
+                # Append found codes to allCodes table
+                allCodes = allCodes.append(finalCodes).reset_index(drop=True)
+
+        # Create and fill in intensity table
+        channels = spots.ch_labels
+        rounds = spots.round_labels
+
+        # create empty IntensityTable filled with np.nan
+        data = np.full((len(allCodes), len(rounds), len(channels)), fill_value=np.nan)
+        dims = (Features.AXIS, Axes.ROUND.value, Axes.CH.value)
+        centers = allCodes['center']
+        coords: Mapping[Hashable, Tuple[str, Any]] = {
+            Features.SPOT_RADIUS: (Features.AXIS, np.full(len(allCodes), 1)),
+            Axes.ZPLANE.value: (Features.AXIS, np.asarray([round(c[2]) for c in centers])),
+            Axes.Y.value: (Features.AXIS, np.asarray([round(c[1]) for c in centers])),
+            Axes.X.value: (Features.AXIS, np.asarray([round(c[0]) for c in centers])),
+            Features.SPOT_ID: (Features.AXIS, np.arange(len(allCodes))),
+            Features.AXIS: (Features.AXIS, np.arange(len(allCodes))),
+            Axes.ROUND.value: (Axes.ROUND.value, rounds),
+            Axes.CH.value: (Axes.CH.value, channels)
+        }
+        int_table = IntensityTable(data=data, dims=dims, coords=coords)
+
+        # Fill in data values
+        table_codes = []
+        for i in range(len(allCodes)):
+            code = []
+            for ch in allCodes.loc[i, 'best_barcodes']:
+                # If a round is not used, row will be all zeros
+                code.append(np.asarray([0 if j != ch else 1 for j in range(len(channels))]))
+            table_codes.append(np.asarray(code))
+        int_table.values = np.asarray(table_codes)
+        int_table = transfer_physical_coords_to_intensity_table(intensity_table=int_table,
+                                                                spots=spots)
+
+        # Validate results are correct shape
+        self.codebook._validate_decode_intensity_input_matches_codebook_shape(int_table)
+
+        # Create DecodedIntensityTable
+        result = DecodedIntensityTable.from_intensity_table(
+            int_table,
+            targets=(Features.AXIS, allCodes['best_targets'].astype('U')),
+            distances=(Features.AXIS, allCodes["best_distances"]),
+            passes_threshold=(Features.AXIS, np.full(len(allCodes), True)),
+            rounds_used=(Features.AXIS, allCodes['rounds_used']))
+
+        return result