Source code for tile_processing.TileProcessor

# Copyright (c) Fraunhofer MEVIS, Germany. All rights reserved.
# **InsertLicense** code

# exempt this module from Python module reloading, which frequently breaks
# exception handling, isinstance checks and enum value comparisons
_mlab_do_not_reload = True # value is currently ignored, but it must be defined

from typing import Optional, Sequence, Dict
from abc import ABC, abstractmethod
import numpy

from parameter_info.parameter_info import ParameterInfo
from parameter_info.utils import to_ParameterInfo

from tile_processing.properties import TileProcessorProperties
from tile_processing import utils as tp_utils

TileMap = Dict[ str, numpy.ndarray ]

# noinspection PyMethodMayBeStatic
# pylint: disable=unnecessary-pass
[docs]class TileProcessor( ABC ):
  """
  Base class for python based tile processors e.g. for machine learning model inference.
  """
  
  # -------------------- Public API, do not override ----------------------------

[docs]  class Error( RuntimeError, ABC ): pass
[docs]  class ParameterError( Error ): pass
[docs]  class ProcessingError( Error ): pass

  PARAMETER_INFO_KEY__OBJECT_TYPE = "__type"
  PARAMETER_INFO_KEY__INPUTS = "__inputs"
  PARAMETER_INFO_KEY__OUTPUTS = "__outputs"
  PARAMETER_INFO_KEY__PROPOSED_PROPERTIES = "__proposedProperties"

  def __init__( self, creatorName:str="" ):
    """
    :param creatorName: Name of the TileProcessor's creator to be added to the parameter info
    """
    self.creatorName : str = creatorName
    self.__finalized : bool = False
    self._usedProperties : Optional[ TileProcessorProperties ] = None

  def __del__(self):
    """
    Make sure finalize() is called at least here
    """
    self.finalize()

[docs]  def prepareProcessBatch( self, usedProperties:TileProcessorProperties, **processingParameters ) -> None:
    """
    Can be called before the first processBatch call. The call is not enforced, but recommended.
    The provided usedProperties are stored in self._usedProperties (so derived classes should not reimplement this method,
    but _prepareProcessBatch instead).

    :param usedProperties: Properties that are to be used within subsequent processBatch calls
    :param processingParameters: kwargs with processing params constant for subsequent processBatch calls
    :returns: None
    :raises: ParameterError if an exception is raised in _prepareProcessBatch, indicating that the object is not ready for processBatch()
    """
    self._usedProperties = usedProperties
    self._prepareProcessBatch( **processingParameters )


[docs]  def getProposedProperties( self ) -> TileProcessorProperties:
    """
    Returns the proposed tile processor properties by calling custom implementation _getProposedProperties().
    Do not overwrite, overwrite _getProposedProperties() instead.

    :returns: A TileProcessorProperties object declaring at least all inputs and outputs by name, and \
    as many properties as possible
    :raises: ParameterError
    """
    return self._getProposedProperties()

[docs]  def processBatch( self, requestedOutputNames:Sequence[ str ], inputs:TileMap ) -> TileMap:
    """
    Public API to convert an input batch into an output batch using the backend. Wraps
    _processBatch().

    :param requestedOutputNames: Names of the outputs to be requested from the TileProcessor
    :param inputs: Dictionary with one input batch per input (keys are the input names)
    :returns: Prediction result for the input batch as a dictionary of with output name keys
    :raises: ParameterError for output name validation errors, ProcessingError for processing issues.
    """
    self.__validateRequestedOutputs( requestedOutputNames )
    return self._processBatch( requestedOutputNames, inputs )

[docs]  def processBatchForPreAllocatedOutputs( self, outputs:TileMap, inputs:TileMap ) -> None:
    """
    Public API to convert an input batch into an output batch for which the nd-arrays
    are already set up and allocated. Wraps _processBatchForPreAllocatedOutputs()
    and apply selected output filtering (if not already done).

    :param outputs: Dictionary of output names and nd-arrays already set up and allocated to take in the result.
    :param inputs: Dictionary with one input batch per input (keys are the input names)
    :returns: None
    :raises: ParameterError for output name validation errors, ProcessingError for processing issues.
    """
    self.__validateRequestedOutputs( tuple( outputs.keys() ) )
    self._processBatchForPreAllocatedOutputs( outputs, inputs )

[docs]  def getParameterInfo( self ) -> ParameterInfo:
    """
    Information about the prediction parameters.
    Usually best not to overwrite, overwrite _addCustomParameterInfo() instead.

    :returns: Information about the prediction parameters.
    """
    param_info = ParameterInfo()
    param_info[ self.PARAMETER_INFO_KEY__OBJECT_TYPE ] = self._getTypeName()
    param_info[ self.PARAMETER_INFO_KEY__PROPOSED_PROPERTIES ] = self.getProposedProperties().getParameterInfo()
    input_info = self._getInputInfo()
    if input_info:
      param_info[ self.PARAMETER_INFO_KEY__INPUTS ] = to_ParameterInfo( input_info )
    output_info = self._getOutputInfo()
    if output_info:    
      param_info[ self.PARAMETER_INFO_KEY__OUTPUTS ] = to_ParameterInfo( output_info )
    self._addCustomParameterInfo( param_info )
    return param_info
  
[docs]  def finalize( self ) -> None:
    """
    To be called when the object is about to be discarded, so that cleanup/reset can occur.
    This method ensures that _finalize() is called, but only once.

    :returns: None
    """
    if not self.__finalized:
      self._finalize()
      self.__finalized = True    
      
      
  # -------------------- Protected API, used to override functionality ----------------------------
    
  def _getInputInfo( self ) -> Dict[ str, dict ]:
    """
    Allows to ask the TileProcessor for its input signature. Of the returned dictionaries, the input info part is not (yet)
    standardized, it will be whatever the TileProcessor (and inference framework) provide, so in this base class, we must only
    use the names!

    :return: Dictionary of the form { "input_name_1": InputInfo1, "input_name_2": InputInfo2 }
    """
    return {}

  def _getOutputInfo( self ) -> Dict[ str, dict ]:
    """
    Allows to ask the TileProcessor for its output signature. Of the returned dictionaries, the output info part is not (yet)
    standardized, it will be whatever the TileProcessor (and inference framework) provide, so in this base class, we must only
    use the names!

    :return: Dictionary of the form { "output_name_1": OutputInfo1, "output_name_2": OutputInfo2 }
    """
    return {}

  def _prepareProcessBatch( self, **processingParameters ) -> None:
    """
    To be implemented for custom setup that needs to run once for all tiles. The used properties
    are available (if provided at all) at self._usedProperties already.

    :param processingParameters: kwargs with processing params constant for subsequent processBatch calls
    :raises: May raise a ParameterError to indicate that the object is not ready for processBatch()
    """
    pass

  @abstractmethod
  def _getProposedProperties( self ) -> TileProcessorProperties:
    """
    Returns the processor's properties.

    :returns: A TileProcessorProperties object declaring at least all inputs and outputs by name, and \
    as many properties as possible
    :raises: NotImplementedError, ParameterError
    """
    raise NotImplementedError()

  @abstractmethod
  def _processBatch( self, requestedOutputs:Sequence[ str ], inputs: TileMap ) -> TileMap:
    """
    Convert an input batch into an output batch, only computing the requestedOutputNames. Must be reimplemented.

    :param requestedOutputs: Names of the outputs to be requested from the TileProcessor
    :param inputs: Dictionary with one input batch per input (keys are the input names)
    :returns: Prediction result for the input batch as dictionary of output names and numpy arrays
    """
    raise NotImplementedError()    

  def _processBatchForPreAllocatedOutputs( self, outputs:TileMap, inputs:TileMap ) -> None:
    """
    Public API to convert an input batch into an output batch for which the ndarrays
    are already set up and allocated. By default, will call _processBatch to do the job
    and copy the result into the outputs.

    :param outputs: Dictionary of output names and ndarrays already set up and allocated to take in the result.
    :param inputs: Dictionary with one input batch per input (keys are the input names)
    :returns: None
    """
    resultTiles = self._processBatch( tuple( outputs.keys() ), inputs )
    for name, output in outputs.items():
      output[ : ] = tp_utils.getReshapedTile( resultTiles[ name ], output.shape )

  def _addCustomParameterInfo( self, currentParameterInfo:ParameterInfo ) -> None:
    """
    Called by getParameterInfo to add custom information for this class.
    To be overwritten/extended by concrete implementation to provide additional information.
    (technically, editing the current info is also possible, but use with care).
    :param currentParameterInfo: Current parameter info to be extended (overwritten)

    :returns: Information about the current TileProcessor
    """
    pass
  
  def _finalize( self ) -> None:
    """
    Derived classes should override to implement custom cleanup/reset behavior. Will only be called once, no custom
    guarding necessary.

    :returns: None
    """
    pass

  def _getTypeName( self ) -> str:
    """
    In some cases, one may want to overwrite this default implementation to show
    a different type name in the parameter infos.

    :return: Type name to be included in the parameter infos
    """
    name = self.__class__.__name__
    if self.creatorName and self.__class__.__name__ not in self.creatorName:
      name = self.creatorName + "__" + name
    return name

  # -------------------- Private helper functions ----------------------------

  def __validateRequestedOutputs( self, requestedOutputs:Sequence[str] ) -> None:
    """
    Validates requestedOutputNames parameter in general and for the current TileProcessor.

    :raises: ParameterError if the requested outputs are invalid in any way
    """
    if not isinstance( requestedOutputs, ( tuple, list ) ):
      raise self.ParameterError( f"'requestedOutputNames' must be a sequence of strings, but was '{requestedOutputs.__class__.__name__}'" )
    if not requestedOutputs:
      raise self.ParameterError( f"Empty list provided for 'requestedOutputNames', must contain at least one output name" )
    for o in requestedOutputs:
      if o not in self._usedProperties.outputs.keys():
        raise self.ParameterError( f"TileProcessor has no such output: '{o}'" )